DATA
Pop sive over time
dataset
data <- read.table(file=here::here("data", "BE_Census_Data.csv"), sep=",", header=TRUE)
head(data)
## Block Old_Label Label Population Genetic_Diversity
## 1 Block3 B3_All_Red_Mix BE B3 2/17 | G1 High1 High1 High
## 2 Block3 B3_All_Red_Mix BE B3 2/17 | G1 High2 High2 High
## 3 Block3 B3_All_Red_Mix BE B3 2/17 | G1 High3 High3 High
## 4 Block3 B3_All_Red_Mix BE B3 2/17 | G1 High4 High4 High
## 5 Block3 B3_All_Red_Mix BE B3 2/17 | G1 High5 High5 High
## 6 Block3 B3_All_Red_Mix BE B3 2/17 | G1 High6 High6 High
## Generation_Parents Generation_Eggs Date_Census Date_Start_Eggs Date_End_Eggs
## 1 0 1 2/17/21 2/17/21 2/18/21
## 2 0 1 2/17/21 2/17/21 2/18/21
## 3 0 1 2/17/21 2/17/21 2/18/21
## 4 0 1 2/17/21 2/17/21 2/18/21
## 5 0 1 2/17/21 2/17/21 2/18/21
## 6 0 1 2/17/21 2/17/21 2/18/21
## Image_Box1 Image_Box2 MC_Box1 MC_Box2 MC_Total_Adults HC_Box1 HC_Box2
## 1 DSC_0155 <NA> NA NA 101.9 NA NA
## 2 DSC_0156 <NA> NA NA 104.7 NA NA
## 3 DSC_0157 <NA> NA NA 105.8 NA NA
## 4 DSC_0158 <NA> NA NA 101.9 NA NA
## 5 DSC_0159 <NA> NA NA 102.2 NA NA
## 6 DSC_0160 <NA> NA NA 101.7 NA NA
## HC_Total_Adults Nb_parents Growth_rate First_Throw Extinction_finalstatus
## 1 100 NA NA no no
## 2 100 NA NA no no
## 3 100 NA NA no no
## 4 100 NA NA no no
## 5 100 NA NA no no
## 6 100 NA NA no no
## Less_than_5
## 1 no
## 2 no
## 3 no
## 4 no
## 5 no
## 6 no
tail(data)
## Block Old_Label Label Population
## 571 Block5 B5 - Backup Fam H 8/25 BE B5 | G6 Med 19 Med19
## 572 Block5 B5 - Backup Fam I 8/25 BE B5 | G6 Med 20 Med20
## 573 Block5 B5-Backup Fam A 8/25 BE B5 | G6 Med 21 Med21
## 574 Block5 B5-Backup Fam B 8/25 BE B5 | G6 Med 22 Med22
## 575 Block5 B5-Backup Fam C 8/25 BE B5 | G6 Med 23 Med23
## 576 Block5 B5-Backup Fam D 8/25 BE B5 | G6 Med 24 Med24
## Genetic_Diversity Generation_Parents Generation_Eggs Date_Census
## 571 Med 5 6 8/25/21
## 572 Med 5 6 8/25/21
## 573 Med 5 6 8/25/21
## 574 Med 5 6 8/25/21
## 575 Med 5 6 8/25/21
## 576 Med 5 6 8/25/21
## Date_Start_Eggs Date_End_Eggs Image_Box1 Image_Box2 MC_Box1 MC_Box2
## 571 8/25/21 8/26/21 DSC_0273 DSC_0274 2.00000 2.00000
## 572 8/25/21 8/26/21 <NA> <NA> NA NA
## 573 8/25/21 8/26/21 DSC_0277 DSC_0278 29.76978 32.92378
## 574 8/25/21 8/26/21 <NA> <NA> NA NA
## 575 8/25/21 8/26/21 DSC_0267 DSC_0268 25.31021 46.10092
## 576 8/25/21 8/26/21 DSC_0283 DSC_0284 71.09684 55.37443
## MC_Total_Adults HC_Box1 HC_Box2 HC_Total_Adults Nb_parents Growth_rate
## 571 4.0 2 2 4 8 0.500000
## 572 0.0 NA NA NA 0 NA
## 573 62.7 23 23 46 35 1.314286
## 574 0.0 NA NA NA NA NA
## 575 71.4 20 36 56 40 1.400000
## 576 126.5 76 57 133 41 3.243902
## First_Throw Extinction_finalstatus Less_than_5
## 571 no no yes
## 572 extinct yes yes
## 573 no no no
## 574 extinct yes yes
## 575 no no no
## 576 no no no
dim(data)
## [1] 576 23
summary(data)
## Block Old_Label Label Population
## Length:576 Length:576 Length:576 Length:576
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Genetic_Diversity Generation_Parents Generation_Eggs Date_Census
## Length:576 Min. :0.0 Min. :1.0 Length:576
## Class :character 1st Qu.:1.0 1st Qu.:2.0 Class :character
## Mode :character Median :2.5 Median :3.5 Mode :character
## Mean :2.5 Mean :3.5
## 3rd Qu.:4.0 3rd Qu.:5.0
## Max. :5.0 Max. :6.0
##
## Date_Start_Eggs Date_End_Eggs Image_Box1 Image_Box2
## Length:576 Length:576 Length:576 Length:576
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## MC_Box1 MC_Box2 MC_Total_Adults HC_Box1
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 26.52 1st Qu.: 25.99 1st Qu.: 51.05 1st Qu.: 22.00
## Median : 58.87 Median : 54.29 Median :101.50 Median : 56.50
## Mean : 75.88 Mean : 70.57 Mean :128.37 Mean : 72.06
## 3rd Qu.:106.93 3rd Qu.: 99.40 3rd Qu.:170.25 3rd Qu.:101.00
## Max. :327.40 Max. :299.00 Max. :514.40 Max. :288.00
## NA's :142 NA's :141 NA's :5 NA's :144
## HC_Box2 HC_Total_Adults Nb_parents Growth_rate
## Min. : 0.00 Min. : 0.0 Min. : 0.00 Min. :0.0000
## 1st Qu.: 25.00 1st Qu.: 62.0 1st Qu.: 67.25 1st Qu.:0.4183
## Median : 52.00 Median :100.0 Median :100.00 Median :0.9386
## Mean : 68.08 Mean :131.8 Mean :138.30 Mean :1.4482
## 3rd Qu.: 96.75 3rd Qu.:169.0 3rd Qu.:188.00 3rd Qu.:2.3587
## Max. :290.00 Max. :499.0 Max. :499.00 Max. :6.8571
## NA's :142 NA's :43 NA's :122 NA's :142
## First_Throw Extinction_finalstatus Less_than_5
## Length:576 Length:576 Length:576
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
#Remove populations killed due to the pathogen
data_status <- data.frame(data$Population,data$Extinction_finalstatus)
data_kill<-data[data$Extinction_finalstatus=="kill",]
dim(data_kill)
## [1] 72 23
data<-data[data$Extinction_finalstatus!="kill",]
#Add pop growth rate
data$Nb_adults <- data$HC_Total_Adults
data$Lambda <- data$Nb_adults / data$Nb_parents
data$obs<-as.factor(1:nrow(data))
#summary(data)
#Remove
#Check variables
str(data)
## 'data.frame': 504 obs. of 26 variables:
## $ Block : chr "Block3" "Block3" "Block3" "Block3" ...
## $ Old_Label : chr "B3_All_Red_Mix " "B3_All_Red_Mix " "B3_All_Red_Mix " "B3_All_Red_Mix " ...
## $ Label : chr "BE B3 2/17 | G1 High1" "BE B3 2/17 | G1 High2" "BE B3 2/17 | G1 High3" "BE B3 2/17 | G1 High4" ...
## $ Population : chr "High1" "High2" "High3" "High4" ...
## $ Genetic_Diversity : chr "High" "High" "High" "High" ...
## $ Generation_Parents : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Generation_Eggs : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Date_Census : chr "2/17/21" "2/17/21" "2/17/21" "2/17/21" ...
## $ Date_Start_Eggs : chr "2/17/21" "2/17/21" "2/17/21" "2/17/21" ...
## $ Date_End_Eggs : chr "2/18/21" "2/18/21" "2/18/21" "2/18/21" ...
## $ Image_Box1 : chr "DSC_0155" "DSC_0156" "DSC_0157" "DSC_0158" ...
## $ Image_Box2 : chr NA NA NA NA ...
## $ MC_Box1 : num NA NA NA NA NA NA NA NA NA NA ...
## $ MC_Box2 : num NA NA NA NA NA NA NA NA NA NA ...
## $ MC_Total_Adults : num 102 105 106 102 102 ...
## $ HC_Box1 : int NA NA NA NA NA NA NA NA NA NA ...
## $ HC_Box2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ HC_Total_Adults : int 100 100 100 100 100 100 100 100 100 100 ...
## $ Nb_parents : num NA NA NA NA NA NA NA NA NA NA ...
## $ Growth_rate : num NA NA NA NA NA NA NA NA NA NA ...
## $ First_Throw : chr "no" "no" "no" "no" ...
## $ Extinction_finalstatus: chr "no" "no" "no" "no" ...
## $ Less_than_5 : chr "no" "no" "no" "no" ...
## $ Nb_adults : int 100 100 100 100 100 100 100 100 100 100 ...
## $ Lambda : num NA NA NA NA NA NA NA NA NA NA ...
## $ obs : Factor w/ 504 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
data$Genetic_Diversity <- as.factor(data$Genetic_Diversity)
data$Block <- as.factor(data$Block)
data$Population <- as.factor(data$Population)
data$Extinction_finalstatus <- as.factor(data$Extinction_finalstatus)
#Order levels
data$Genetic_Diversity <- plyr::revalue(data$Genetic_Diversity, c("Med"="Medium"))
data$Genetic_Diversity <- factor(data$Genetic_Diversity, levels = c("High", "Medium", "Low"))
levels(data$Genetic_Diversity)
## [1] "High" "Medium" "Low"
data<-droplevels(data) #remove previous levels
str(data)
## 'data.frame': 504 obs. of 26 variables:
## $ Block : Factor w/ 3 levels "Block3","Block4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Old_Label : chr "B3_All_Red_Mix " "B3_All_Red_Mix " "B3_All_Red_Mix " "B3_All_Red_Mix " ...
## $ Label : chr "BE B3 2/17 | G1 High1" "BE B3 2/17 | G1 High2" "BE B3 2/17 | G1 High3" "BE B3 2/17 | G1 High4" ...
## $ Population : Factor w/ 84 levels "High1","High13",..: 1 9 19 24 25 26 27 28 39 49 ...
## $ Genetic_Diversity : Factor w/ 3 levels "High","Medium",..: 1 1 1 1 1 1 1 3 3 3 ...
## $ Generation_Parents : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Generation_Eggs : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Date_Census : chr "2/17/21" "2/17/21" "2/17/21" "2/17/21" ...
## $ Date_Start_Eggs : chr "2/17/21" "2/17/21" "2/17/21" "2/17/21" ...
## $ Date_End_Eggs : chr "2/18/21" "2/18/21" "2/18/21" "2/18/21" ...
## $ Image_Box1 : chr "DSC_0155" "DSC_0156" "DSC_0157" "DSC_0158" ...
## $ Image_Box2 : chr NA NA NA NA ...
## $ MC_Box1 : num NA NA NA NA NA NA NA NA NA NA ...
## $ MC_Box2 : num NA NA NA NA NA NA NA NA NA NA ...
## $ MC_Total_Adults : num 102 105 106 102 102 ...
## $ HC_Box1 : int NA NA NA NA NA NA NA NA NA NA ...
## $ HC_Box2 : int NA NA NA NA NA NA NA NA NA NA ...
## $ HC_Total_Adults : int 100 100 100 100 100 100 100 100 100 100 ...
## $ Nb_parents : num NA NA NA NA NA NA NA NA NA NA ...
## $ Growth_rate : num NA NA NA NA NA NA NA NA NA NA ...
## $ First_Throw : chr "no" "no" "no" "no" ...
## $ Extinction_finalstatus: Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Less_than_5 : chr "no" "no" "no" "no" ...
## $ Nb_adults : int 100 100 100 100 100 100 100 100 100 100 ...
## $ Lambda : num NA NA NA NA NA NA NA NA NA NA ...
## $ obs : Factor w/ 504 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
head(data)
## Block Old_Label Label Population Genetic_Diversity
## 1 Block3 B3_All_Red_Mix BE B3 2/17 | G1 High1 High1 High
## 2 Block3 B3_All_Red_Mix BE B3 2/17 | G1 High2 High2 High
## 3 Block3 B3_All_Red_Mix BE B3 2/17 | G1 High3 High3 High
## 4 Block3 B3_All_Red_Mix BE B3 2/17 | G1 High4 High4 High
## 5 Block3 B3_All_Red_Mix BE B3 2/17 | G1 High5 High5 High
## 6 Block3 B3_All_Red_Mix BE B3 2/17 | G1 High6 High6 High
## Generation_Parents Generation_Eggs Date_Census Date_Start_Eggs Date_End_Eggs
## 1 0 1 2/17/21 2/17/21 2/18/21
## 2 0 1 2/17/21 2/17/21 2/18/21
## 3 0 1 2/17/21 2/17/21 2/18/21
## 4 0 1 2/17/21 2/17/21 2/18/21
## 5 0 1 2/17/21 2/17/21 2/18/21
## 6 0 1 2/17/21 2/17/21 2/18/21
## Image_Box1 Image_Box2 MC_Box1 MC_Box2 MC_Total_Adults HC_Box1 HC_Box2
## 1 DSC_0155 <NA> NA NA 101.9 NA NA
## 2 DSC_0156 <NA> NA NA 104.7 NA NA
## 3 DSC_0157 <NA> NA NA 105.8 NA NA
## 4 DSC_0158 <NA> NA NA 101.9 NA NA
## 5 DSC_0159 <NA> NA NA 102.2 NA NA
## 6 DSC_0160 <NA> NA NA 101.7 NA NA
## HC_Total_Adults Nb_parents Growth_rate First_Throw Extinction_finalstatus
## 1 100 NA NA no no
## 2 100 NA NA no no
## 3 100 NA NA no no
## 4 100 NA NA no no
## 5 100 NA NA no no
## 6 100 NA NA no no
## Less_than_5 Nb_adults Lambda obs
## 1 no 100 NA 1
## 2 no 100 NA 2
## 3 no 100 NA 3
## 4 no 100 NA 4
## 5 no 100 NA 5
## 6 no 100 NA 6
tail(data)
## Block Old_Label Label Population
## 571 Block5 B5 - Backup Fam H 8/25 BE B5 | G6 Med 19 Med19
## 572 Block5 B5 - Backup Fam I 8/25 BE B5 | G6 Med 20 Med20
## 573 Block5 B5-Backup Fam A 8/25 BE B5 | G6 Med 21 Med21
## 574 Block5 B5-Backup Fam B 8/25 BE B5 | G6 Med 22 Med22
## 575 Block5 B5-Backup Fam C 8/25 BE B5 | G6 Med 23 Med23
## 576 Block5 B5-Backup Fam D 8/25 BE B5 | G6 Med 24 Med24
## Genetic_Diversity Generation_Parents Generation_Eggs Date_Census
## 571 Medium 5 6 8/25/21
## 572 Medium 5 6 8/25/21
## 573 Medium 5 6 8/25/21
## 574 Medium 5 6 8/25/21
## 575 Medium 5 6 8/25/21
## 576 Medium 5 6 8/25/21
## Date_Start_Eggs Date_End_Eggs Image_Box1 Image_Box2 MC_Box1 MC_Box2
## 571 8/25/21 8/26/21 DSC_0273 DSC_0274 2.00000 2.00000
## 572 8/25/21 8/26/21 <NA> <NA> NA NA
## 573 8/25/21 8/26/21 DSC_0277 DSC_0278 29.76978 32.92378
## 574 8/25/21 8/26/21 <NA> <NA> NA NA
## 575 8/25/21 8/26/21 DSC_0267 DSC_0268 25.31021 46.10092
## 576 8/25/21 8/26/21 DSC_0283 DSC_0284 71.09684 55.37443
## MC_Total_Adults HC_Box1 HC_Box2 HC_Total_Adults Nb_parents Growth_rate
## 571 4.0 2 2 4 8 0.500000
## 572 0.0 NA NA NA 0 NA
## 573 62.7 23 23 46 35 1.314286
## 574 0.0 NA NA NA NA NA
## 575 71.4 20 36 56 40 1.400000
## 576 126.5 76 57 133 41 3.243902
## First_Throw Extinction_finalstatus Less_than_5 Nb_adults Lambda obs
## 571 no no yes 4 0.500000 499
## 572 extinct yes yes NA NA 500
## 573 no no no 46 1.314286 501
## 574 extinct yes yes NA NA 502
## 575 no no no 56 1.400000 503
## 576 no no no 133 3.243902 504
dim(data)
## [1] 504 26
#Add variable
data$gen_square <- data$Generation_Eggs^2
##Color code
# values = c("#685338", "#FEA698", "#7BC7AD")) +
# values = c( "#51392C","#DA5837", "#4C9E97")) +
# values = c( "#00AFBB", "#E7B800", "#FC4E07")) +
# values = c("#9AD3CA", "#929FD1", "#E7C7D7")) +
#values = c("#227C9D", "#FFCB77", "#FE6D73")) +
Heterozygosity remain
over time
#Upload He dataset
data_he <- read.table(file=here::here("data", "He_ReMaining_BeatricePop.csv"), sep=",", header=TRUE)
data_he$Population <- as.factor(data_he$Population)
data_he <- data_he[,c(1,3)]
#Merge dataset: add heterozygosity data to oridinal data
data <- merge(x = data, y = data_he, by = "Population", all.x=TRUE)
### Creation of 4 He:
# 1- He_start: He was remaining for each population when we started the experiment
# 2- He_lost_gen_t: He was lost only during this specfic generation during the experiment (considering He=1 before this generation)
# 3- He_remain: He was remaining after each generation (He_remain[Gen=6]=He_end)
# 4- He_exp: He was lost during the entire experiment (considering He=1 at the beginning of the exp)
# 5- He_end: He was remaining for each population when we finished the experiment (considering the real He at the beginning of the exp, He_start, not 1)
#####
##### 1- He_start
#####
colnames(data)[28] <- "He_start"
#####
##### 2- He_lost_gen_t
#####
#Add He lost each generation
data$He_lost_gen_t <- 1 - (1/(2*data$Nb_adults))
#To consider extinct populations
is.na(data$He_lost_gen_t) <- sapply (data$He_lost_gen_t, is.infinite)
#####
##### 3- He_remain
#####
#Create new variable He remain per generation
data$He_remain <- NA
#Initiate with gen=1: He_start * He_lost_gen1
data$He_remain[data$Generation_Eggs=="1"] <- data$He_start[data$Generation_Eggs=="1"]*
data$He_lost_gen_t[data$Generation_Eggs=="1"]
#Compute for all the other genrations, except the first one
for(i in levels(data$Population)){
ifelse(sum(data$Generation_Eggs[data$Population==i])!=21,print("ERROR"),print(i))
for(j in 2:max(data$Generation_Eggs)){
data$He_remain[data$Population==i&data$Generation_Eggs==j] <-
data$He_remain[data$Population==i&data$Generation_Eggs==j-1]*
data$He_lost_gen_t[data$Population==i&data$Generation_Eggs==j]
}
}
## [1] "High1"
## [1] "High13"
## [1] "High14"
## [1] "High15"
## [1] "High16"
## [1] "High17"
## [1] "High18"
## [1] "High19"
## [1] "High2"
## [1] "High20"
## [1] "High21"
## [1] "High22"
## [1] "High23"
## [1] "High24"
## [1] "High25"
## [1] "High26"
## [1] "High27"
## [1] "High28"
## [1] "High3"
## [1] "High32"
## [1] "High33"
## [1] "High34"
## [1] "High35"
## [1] "High4"
## [1] "High5"
## [1] "High6"
## [1] "High7"
## [1] "Low1"
## [1] "Low10"
## [1] "Low11"
## [1] "Low12"
## [1] "Low13"
## [1] "Low14"
## [1] "Low15"
## [1] "Low16"
## [1] "Low17"
## [1] "Low18"
## [1] "Low19"
## [1] "Low2"
## [1] "Low20"
## [1] "Low21"
## [1] "Low22"
## [1] "Low24"
## [1] "Low25"
## [1] "Low26"
## [1] "Low27"
## [1] "Low28"
## [1] "Low29"
## [1] "Low3"
## [1] "Low30"
## [1] "Low31"
## [1] "Low32"
## [1] "Low33"
## [1] "Low34"
## [1] "Low35"
## [1] "Low36"
## [1] "Low4"
## [1] "Low5"
## [1] "Low7"
## [1] "Low8"
## [1] "Low9"
## [1] "Med1"
## [1] "Med10"
## [1] "Med11"
## [1] "Med12"
## [1] "Med13"
## [1] "Med14"
## [1] "Med15"
## [1] "Med17"
## [1] "Med18"
## [1] "Med19"
## [1] "Med2"
## [1] "Med20"
## [1] "Med21"
## [1] "Med22"
## [1] "Med23"
## [1] "Med24"
## [1] "Med3"
## [1] "Med4"
## [1] "Med5"
## [1] "Med6"
## [1] "Med7"
## [1] "Med8"
## [1] "Med9"
#####
##### 4- He_exp
#####
# He at the end of the experiment
data$He_exp <- NA
#Compute the HE at the end of the experiment
for(i in levels(data$Population)){
temp_data_pop <- subset(data, Population==i)
ifelse(sum(temp_data_pop$Generation_Eggs)!=21,print("ERROR"),print(i))
temp_he_remaining_gen <- 1 - (1/(2*temp_data_pop$Nb_adults))
#To consider extinct add this row:
is.na(temp_he_remaining_gen) <- sapply (temp_he_remaining_gen, is.infinite)
#Compute for the whole experiment
temp_he_remain_exp <- prod(temp_he_remaining_gen, na.rm = TRUE)
data$He_exp[data$Population==i] <- temp_he_remain_exp
rm(temp_he_remain_exp,temp_he_remaining_gen,temp_data_pop)
}
## [1] "High1"
## [1] "High13"
## [1] "High14"
## [1] "High15"
## [1] "High16"
## [1] "High17"
## [1] "High18"
## [1] "High19"
## [1] "High2"
## [1] "High20"
## [1] "High21"
## [1] "High22"
## [1] "High23"
## [1] "High24"
## [1] "High25"
## [1] "High26"
## [1] "High27"
## [1] "High28"
## [1] "High3"
## [1] "High32"
## [1] "High33"
## [1] "High34"
## [1] "High35"
## [1] "High4"
## [1] "High5"
## [1] "High6"
## [1] "High7"
## [1] "Low1"
## [1] "Low10"
## [1] "Low11"
## [1] "Low12"
## [1] "Low13"
## [1] "Low14"
## [1] "Low15"
## [1] "Low16"
## [1] "Low17"
## [1] "Low18"
## [1] "Low19"
## [1] "Low2"
## [1] "Low20"
## [1] "Low21"
## [1] "Low22"
## [1] "Low24"
## [1] "Low25"
## [1] "Low26"
## [1] "Low27"
## [1] "Low28"
## [1] "Low29"
## [1] "Low3"
## [1] "Low30"
## [1] "Low31"
## [1] "Low32"
## [1] "Low33"
## [1] "Low34"
## [1] "Low35"
## [1] "Low36"
## [1] "Low4"
## [1] "Low5"
## [1] "Low7"
## [1] "Low8"
## [1] "Low9"
## [1] "Med1"
## [1] "Med10"
## [1] "Med11"
## [1] "Med12"
## [1] "Med13"
## [1] "Med14"
## [1] "Med15"
## [1] "Med17"
## [1] "Med18"
## [1] "Med19"
## [1] "Med2"
## [1] "Med20"
## [1] "Med21"
## [1] "Med22"
## [1] "Med23"
## [1] "Med24"
## [1] "Med3"
## [1] "Med4"
## [1] "Med5"
## [1] "Med6"
## [1] "Med7"
## [1] "Med8"
## [1] "Med9"
#####
##### 5- He_end
#####
# Remaining He at the end of the experiment
data$He_end <- data$He_start * data$He_exp
#Save these values for Phenotyping
data_he <- merge(x = data_he,
y = data[data$Generation_Eggs==1,
c("Population","He_start",
"He_exp","He_end")],
by = "Population", all.x=TRUE)
Phenotyping
dataset
#Upload growth rate end of experiment
data_phenotyping <- read.table(file=here::here("data", "Adaptation_Data.csv"), sep=",", header=TRUE)
#Factors variables
data_phenotyping$ID_Rep <- as.factor(data_phenotyping$ID_Rep)
data_phenotyping$Week <- as.factor(data_phenotyping$Week)
data_phenotyping$Block <- as.factor(data_phenotyping$Block)
data_phenotyping$Population <- as.factor(data_phenotyping$Population)
#Revalue
data_phenotyping$Genetic_Diversity <- as.factor(data_phenotyping$Divsersity)
data_phenotyping$Genetic_Diversity <- plyr::revalue(data_phenotyping$Genetic_Diversity, c("Med"="Medium"))
data_phenotyping$Genetic_Diversity <- factor(data_phenotyping$Genetic_Diversity,
levels = c("High", "Medium", "Low"))
#Add sum total
data_phenotyping$Nb_ttx <- rowSums(data_phenotyping[,11:13], na.rm=TRUE)
#Proportion
data_phenotyping$p_adults <- data_phenotyping$Adults / data_phenotyping$Nb_ttx
data_phenotyping$p_pupae <- data_phenotyping$Pupae / data_phenotyping$Nb_ttx
data_phenotyping$p_larvae <- data_phenotyping$Larvae / data_phenotyping$Nb_ttx
data_phenotyping_4week <- data_phenotyping[data_phenotyping$Week=="4-week",]
data_phenotyping <- data_phenotyping[data_phenotyping$Week=="5-week",]
#Merge with He dataset
data_phenotyping <- merge(x = data_phenotyping,
y = data_he, by = "Population", all.x=TRUE)
data_phenotyping_4week <- merge(x = data_phenotyping_4week,
y = data_he, by = "Population", all.x=TRUE)
#Clean if less than 30 parents
pop_possible<-unique(data$Population[data$Generation_Eggs==5&data$Nb_adults>=30])
data_phenotyping <- data_phenotyping[data_phenotyping$Population %in% pop_possible,]
pop_possible<-unique(data$Population[data$Generation_Eggs==4&data$Nb_adults>=30])
data_phenotyping_4week <- data_phenotyping_4week[data_phenotyping_4week$Population %in% pop_possible,]
data_phenotyping_all <- rbind(data_phenotyping,data_phenotyping_4week)
Old He remain
# #Upload growth rate end of experiment
# data_he <- read.table(file=here::here("data", "He_ReMaining_BeatricePop.csv"), sep=",", header=TRUE)
# data_he$Population <- as.factor(data_he$Population)
# data_he <- data_he[,c(1,3)]
#
#
# ### Additional: He remaining
# # New vector for the lost during exp
# data_he$He_remain_exp <- NA
#
# # He lost during exp
# for(i in levels(data$Population)){
# temp_data_pop <- subset(data, Population==i)
# ifelse(sum(temp_data_pop$Generation_Parents)!=15,print("ERROR"),print(i))
# temp_he_remaining_gen <- 1 - (1/(2*temp_data_pop$Nb_parents))
#
# #To consider extinct add this row:
# is.na(temp_he_remaining_gen) <- sapply (temp_he_remaining_gen, is.infinite)
#
# temp_he_remain_exp <- prod(temp_he_remaining_gen, na.rm = TRUE)
# data_he$He_remain_exp[data_he$Population==i] <- temp_he_remain_exp
# rm(temp_he_remain_exp,temp_he_remaining_gen,temp_data_pop)
# }
#
# # Remaining He at the end of the experiment
# data_he$He_end <- data_he$He_remain * data_he$He_remain_exp
#
# # Remove kill population
# data_he <- data_he[!is.na(data_he$He_remain_exp),]
# data_he <- droplevels(data_he)
#
# ## Merge dataset He
# data_old_merged <- merge(x = data, y = data_he, by = "Population", all.x=TRUE)
Survival data
data_survival <- data[data$Generation_Parents==0,c("Population","Block","Genetic_Diversity")]
data_survival$Gen_eggs_extinct <- max(data$Generation_Eggs)
data_survival$Survival <- "yes"
#Format data
for (i in unique(data_survival$Population)) {
if (data$Extinction_finalstatus[data$Population==i&data$Generation_Eggs==1] == "yes" ) {
data_survival$Gen_eggs_extinct[data_survival$Population==i]<-
min(data$Generation_Eggs[data$Population==i&data$First_Throw=="extinct"])
data_survival$Survival[data_survival$Population==i] <- "extinct"
}else{
print(i)
}
}
## [1] "High1"
## [1] "High13"
## [1] "High14"
## [1] "High15"
## [1] "High16"
## [1] "High17"
## [1] "High18"
## [1] "High19"
## [1] "High2"
## [1] "High20"
## [1] "High21"
## [1] "High22"
## [1] "High23"
## [1] "High24"
## [1] "High25"
## [1] "High26"
## [1] "High27"
## [1] "High28"
## [1] "High3"
## [1] "High32"
## [1] "High33"
## [1] "High34"
## [1] "High35"
## [1] "High4"
## [1] "High5"
## [1] "High6"
## [1] "High7"
## [1] "Low1"
## [1] "Low10"
## [1] "Low11"
## [1] "Low12"
## [1] "Low13"
## [1] "Low14"
## [1] "Low15"
## [1] "Low17"
## [1] "Low18"
## [1] "Low19"
## [1] "Low2"
## [1] "Low20"
## [1] "Low21"
## [1] "Low22"
## [1] "Low24"
## [1] "Low25"
## [1] "Low26"
## [1] "Low29"
## [1] "Low3"
## [1] "Low34"
## [1] "Low35"
## [1] "Low4"
## [1] "Low5"
## [1] "Low7"
## [1] "Low8"
## [1] "Low9"
## [1] "Med1"
## [1] "Med10"
## [1] "Med11"
## [1] "Med12"
## [1] "Med13"
## [1] "Med15"
## [1] "Med18"
## [1] "Med19"
## [1] "Med2"
## [1] "Med21"
## [1] "Med23"
## [1] "Med24"
## [1] "Med3"
## [1] "Med4"
## [1] "Med6"
## [1] "Med7"
## [1] "Med8"
## [1] "Med9"
str(data_survival)
## 'data.frame': 84 obs. of 5 variables:
## $ Population : Factor w/ 84 levels "High1","High13",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Block : Factor w/ 3 levels "Block3","Block4",..: 1 2 2 2 2 2 2 2 1 2 ...
## $ Genetic_Diversity: Factor w/ 3 levels "High","Medium",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Gen_eggs_extinct : int 6 6 6 6 6 6 6 6 6 6 ...
## $ Survival : chr "yes" "yes" "yes" "yes" ...
data_survival$Survival <- as.factor(data_survival$Survival)
#Note: the Surv() function in the {survival} package accepts by default TRUE/FALSE, where TRUE is event and FALSE is censored; 1/0 where 1 is event and 0 is censored; or 2/1 where 2 is event and 1 is censored. Please take care to ensure the event indicator is properly formatted.
data_survival$status <- ifelse(data_survival$Survival=="extinct", 1, 0)
PLOT
Population size
PLOT_Pop_size<-ggplot2::ggplot(data, aes(x = factor(Generation_Eggs),
y = Nb_adults,
group = Population,
colour = Genetic_Diversity)) +
geom_point(position = position_dodge(0.4), size = 0.8, alpha = 0.7) +
geom_line(position = position_dodge(0.4), size = 0.2, alpha = 0.6) +
ylab("Population size") +
labs(color="Genetic diversity") +
xlab("Generation") +
theme_LO
PLOT_Pop_size

SUM_Popsize <- Rmisc::summarySE(data,
measurevar = c("Nb_adults"),
groupvars = c("Generation_Eggs",
"Genetic_Diversity"),
na.rm=TRUE)
SUM_Popsize
## Generation_Eggs Genetic_Diversity N Nb_adults sd se ci
## 1 1 High 27 100.00000 0.00000 0.000000 0.00000
## 2 1 Medium 23 100.00000 0.00000 0.000000 0.00000
## 3 1 Low 34 100.00000 0.00000 0.000000 0.00000
## 4 2 High 27 344.29630 73.77294 14.197610 29.18360
## 5 2 Medium 23 282.04348 100.75735 21.009360 43.57075
## 6 2 Low 34 282.61765 78.53006 13.467794 27.40043
## 7 3 High 27 151.85185 80.96899 15.582489 32.03027
## 8 3 Medium 23 118.73913 88.54491 18.462891 38.28969
## 9 3 Low 34 104.61765 85.13341 14.600260 29.70445
## 10 4 High 26 114.00000 80.20224 15.728954 32.39439
## 11 4 Medium 23 62.65217 64.50483 13.450188 27.89398
## 12 4 Low 34 46.38235 39.63241 6.796903 13.82840
## 13 5 High 27 103.62963 51.56486 9.923661 20.39838
## 14 5 Medium 21 58.57143 51.11122 11.153383 23.26555
## 15 5 Low 31 51.51613 46.13377 8.285870 16.92200
## 16 6 High 27 147.66667 42.60282 8.198916 16.85311
## 17 6 Medium 19 69.73684 46.02396 10.558620 22.18284
## 18 6 Low 28 77.03571 46.14428 8.720450 17.89289
# SUM_Popsize <- Rmisc::summarySE(data[data$Extinction_finalstatus=="no",],
# measurevar = c("Nb_adults"),
# groupvars = c("Generation_Eggs",
# "Genetic_Diversity"),
# na.rm=TRUE)
# SUM_Popsize
PLOT_Pop_size_average <- PLOT_Pop_size +
geom_point(data = SUM_Popsize, aes(x = factor(Generation_Eggs),
y = Nb_adults,
group = Genetic_Diversity,
colour = Genetic_Diversity),
size = 5, position = position_dodge(0.4)) +
geom_errorbar(data = SUM_Popsize, aes(x = factor(Generation_Eggs),
ymin = Nb_adults-ci, ymax = Nb_adults+ci,
group = Genetic_Diversity,
colour = Genetic_Diversity),
size = 1, width=.2, position = position_dodge(0.4)) +
geom_line(data = SUM_Popsize, aes(x = factor(Generation_Eggs),
y = Nb_adults,
group = Genetic_Diversity,
colour = Genetic_Diversity),
linetype = "dashed", size = 1.5, position = position_dodge(0.4))
PLOT_Pop_size_average

# cowplot::save_plot(file = here::here("figures","PopulationSize_average.pdf"), PLOT_Pop_size_average, base_height = 10/cm(1), base_width = 15/cm(1), dpi = 610)
#Prediction with models
# Test effect
# data$gen_square <- data$Generation_Eggs^2
mod1 <- lme4::glmer(Nb_adults~Generation_Eggs*Genetic_Diversity +
gen_square*Genetic_Diversity +
Block + (1|obs),
data = data[data$Generation_Eggs>=2&data$Extinction_finalstatus=="no",],
family = "poisson")
#Predict data
filldata <- data.frame(Genetic_Diversity = rep(levels(data$Genetic_Diversity),100),
Generation_Eggs = rep(seq(2,6, length = 100),each=3),
Block = "Block4",
gen_square = (rep(seq(2,6, length = 100),each=3))^2,
Estimates = NA)
filldata$Estimates <- predict(mod1, newdata=filldata, type = "response", re.form=~0)
#Predcit estimate minimun values
tapply(filldata$Estimates, filldata$Genetic_Diversity, min)
## High Low Medium
## 85.85198 44.16308 51.99642
filldata[filldata$Estimates>=44.16&filldata$Estimates<=44.17,]
## Genetic_Diversity Generation_Eggs Block gen_square Estimates
## 198 Low 4.626263 Block4 21.40231 44.16308
filldata[filldata$Estimates>=51.99&filldata$Estimates<=52.00,]
## Genetic_Diversity Generation_Eggs Block gen_square Estimates
## 230 Medium 5.070707 Block4 25.71207 51.99642
filldata[filldata$Estimates>=85.85&filldata$Estimates<=85.86,]
## Genetic_Diversity Generation_Eggs Block gen_square Estimates
## 181 High 4.424242 Block4 19.57392 85.85198
## Change name vector
vector_names <- c(`Low` = "Strong bottleneck",
`Medium` = "Intermediate bottleneck",
`High` = "Diverse")
PLOT_Pop_size_predict<-ggplot2::ggplot(data[data$Extinction_finalstatus=="no",],
aes(x = factor(Generation_Eggs),
y = Nb_adults)) +
geom_point(aes(group = Population,
colour = Genetic_Diversity),
position = position_dodge(0.4), size = 0.7, alpha = 0.5) +
geom_line(aes(group = Population,
colour = Genetic_Diversity),
position = position_dodge(0.4), size = 0.05, alpha = 0.5) +
geom_line(data = filldata, aes(x = Generation_Eggs,
y = Estimates,
colour = Genetic_Diversity), size = 1.4) +
scale_color_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FC4E07", "#E7B800")) +
ylab("Population size") +
xlab("Generation") +
theme_LO
PLOT_Pop_size_predict

#
# cowplot::save_plot(file = here::here("figures","PopulationSize_predict.pdf"), PLOT_Pop_size_predict, base_height = 10/cm(1), base_width = 15/cm(1), dpi = 610)
#
#
Extinction
#Percent of extinction
length(unique(data$Population[data$Genetic_Diversity=="Low"&
data$Extinction_finalstatus=="yes"]))/
length(unique(data$Population[data$Genetic_Diversity=="Low"]))
## [1] 0.2352941
length(unique(data$Population[data$Genetic_Diversity=="Medium"&
data$Extinction_finalstatus=="yes"]))/
length(unique(data$Population[data$Genetic_Diversity=="Medium"]))
## [1] 0.2173913
length(unique(data$Population[data$Genetic_Diversity=="High"&
data$Extinction_finalstatus=="yes"]))/
length(unique(data$Population[data$Genetic_Diversity=="High"]))
## [1] 0
#Create data with percent
vector_names <- c(`Low` = "Strong bottleneck",
`Medium` = "Intermediate bottleneck",
`High` = "Diverse")
f_labels = data.frame(Genetic_Diversity = c("High","Medium","Low"),
label = c("Extinction = 0 %", "Extinction = 21.7 %", "Extinction = 23.5 %"))
f_labels$Genetic_Diversity <- factor(f_labels$Genetic_Diversity,
levels = c("High", "Medium", "Low"))
## Extinction yes no
PLOT_Extinction<-ggplot2::ggplot(data, aes(x = factor(Generation_Eggs),
y = Nb_adults)) +
facet_wrap(~ Genetic_Diversity, ncol=3, labeller = ggplot2::as_labeller(vector_names)) +
#geom_point(position = position_dodge(0.4), size = 0.4, alpha = 0.7) +
geom_line(aes(group = Population,
colour = Extinction_finalstatus),
position = position_dodge(0.1), size = 0.4, alpha = 0.8) +
geom_text(x = 5, y = 420,
aes(label = label),
data = f_labels,
col="red",
size = 3,
vjust = 0) +
scale_color_manual(values = c("black", "red")) +
ylab("Population size") +
xlab("Generation") +
theme_LO +
theme(strip.background = element_rect(color="grey30",
fill="white", size=0.5, linetype="solid"),
strip.text.x = element_text(size = 10, color = "black", face = "bold"),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
legend.position = "none")
PLOT_Extinction

#
#
# cowplot::save_plot(here::here("figures","Fig2_Extinction.pdf"), PLOT_Extinction, base_height = 8/cm(1),
# base_width = 20/cm(1), dpi = 610)
#
#
PLOT_Extinction

Growth rate G2
tapply(data[data$Generation_Eggs==2,]$Lambda,data[data$Generation_Eggs==2,]$Genetic_Diversity,mean)
## High Medium Low
## 3.442963 2.820435 2.826176
PLOT_Growth<-ggplot2::ggplot(data[data$Generation_Eggs==2,], aes(x = Genetic_Diversity, y = Lambda,
colour = Genetic_Diversity)) +
geom_boxplot(aes(group = Genetic_Diversity),outlier.colour = NA) +
geom_jitter(width = 0.25, size = 1, alpha = 0.8) +
ylab("Growth rate") +
theme(legend.position = "none") +
xlab("Genetic diversity") +
theme_LO
PLOT_Growth

#
# cowplot::save_plot(here::here("figures","OldPlot_Growth_Start.pdf"), PLOT_Growth, base_height = 10/cm(1),
# base_width = 8/cm(1), dpi = 610)
Life stage
proportion
SUM_Prop_adults<- Rmisc::summarySE(data_phenotyping_all,
measurevar = c("p_adults"),
groupvars = c("Genetic_Diversity","Week"),
na.rm = TRUE)
SUM_Prop_adults
## Genetic_Diversity Week N p_adults sd se ci
## 1 High 4-week 40 0.6315807 0.18084723 0.02859446 0.05783775
## 2 High 5-week 94 0.9646852 0.05234098 0.00539856 0.01072047
## 3 Medium 4-week 18 0.7261684 0.16721535 0.03941304 0.08315424
## 4 Medium 5-week 38 0.9594668 0.06046606 0.00980889 0.01987470
## 5 Low 4-week 27 0.6649974 0.19731065 0.03797245 0.07805350
## 6 Low 5-week 52 0.9538157 0.08241700 0.01142918 0.02294504
SUM_Prop_pupae<- Rmisc::summarySE(data_phenotyping_all,
measurevar = c("p_pupae"),
groupvars = c("Genetic_Diversity","Week"),
na.rm = TRUE)
SUM_Prop_pupae
## Genetic_Diversity Week N p_pupae sd se ci
## 1 High 4-week 40 0.31153108 0.15526911 0.024550202 0.049657471
## 2 High 5-week 94 0.01417884 0.02356334 0.002430373 0.004826238
## 3 Medium 4-week 18 0.23410139 0.15149574 0.035707888 0.075337059
## 4 Medium 5-week 38 0.01931921 0.02904804 0.004712215 0.009547854
## 5 Low 4-week 27 0.28241476 0.17824104 0.034302504 0.070509807
## 6 Low 5-week 52 0.01796037 0.02546214 0.003530964 0.007088705
SUM_Prop_larvae<- Rmisc::summarySE(data_phenotyping_all,
measurevar = c("p_larvae"),
groupvars = c("Genetic_Diversity","Week"),
na.rm = TRUE)
SUM_Prop_larvae
## Genetic_Diversity Week N p_larvae sd se ci
## 1 High 4-week 40 0.05688822 0.04383041 0.006930197 0.014017646
## 2 High 5-week 94 0.02113595 0.04275336 0.004409672 0.008756736
## 3 Medium 4-week 18 0.03973025 0.03784416 0.008919954 0.018819458
## 4 Medium 5-week 38 0.02121399 0.04478738 0.007265472 0.014721244
## 5 Low 4-week 27 0.05258789 0.08541162 0.016437473 0.033787710
## 6 Low 5-week 52 0.02822390 0.07073457 0.009809120 0.019692631
SUM_Prop <- data.frame(SUM_Prop_adults[,1:4],
p_pupae=SUM_Prop_pupae$p_pupae,
p_larvae=SUM_Prop_larvae$p_larvae)
SUM_Prop<-tidyr::gather(SUM_Prop,"Stage", "Proportion",4:6,-"Genetic_Diversity")
SUM_Prop
## Genetic_Diversity Week N Stage Proportion
## 1 High 4-week 40 p_adults 0.63158069
## 2 High 5-week 94 p_adults 0.96468521
## 3 Medium 4-week 18 p_adults 0.72616835
## 4 Medium 5-week 38 p_adults 0.95946680
## 5 Low 4-week 27 p_adults 0.66499735
## 6 Low 5-week 52 p_adults 0.95381574
## 7 High 4-week 40 p_pupae 0.31153108
## 8 High 5-week 94 p_pupae 0.01417884
## 9 Medium 4-week 18 p_pupae 0.23410139
## 10 Medium 5-week 38 p_pupae 0.01931921
## 11 Low 4-week 27 p_pupae 0.28241476
## 12 Low 5-week 52 p_pupae 0.01796037
## 13 High 4-week 40 p_larvae 0.05688822
## 14 High 5-week 94 p_larvae 0.02113595
## 15 Medium 4-week 18 p_larvae 0.03973025
## 16 Medium 5-week 38 p_larvae 0.02121399
## 17 Low 4-week 27 p_larvae 0.05258789
## 18 Low 5-week 52 p_larvae 0.02822390
SUM_Prop$Stage <- factor(SUM_Prop$Stage, levels = c("p_adults", "p_pupae", "p_larvae"))
# New facet label names for supp variable
supp.weeks <- c("4 week", "5 week")
names(supp.weeks) <- c("4-week", "5-week")
PLOT_prop <- ggplot2::ggplot(data = SUM_Prop, aes(x = Genetic_Diversity,
y = Proportion, fill = Stage)) +
facet_wrap(~ Week, ncol=2,
labeller = labeller(Week = supp.weeks)) +
geom_bar(stat="identity") +
xlab("Demographic history") +
labs(color="Stage of individuals") +
ylab("Proportion of each stage") +
scale_fill_manual(values= c("#619CFF","#F8766D","#00BA38"),
breaks = c("p_adults", "p_pupae", "p_larvae"),
labels = c( "Adult", "Pupae","Larvae")) +
ggplot2::scale_x_discrete(breaks=c("High", "Medium", "Low"),
labels=c("Diverse",
"Intermediate\nbottleneck",
"Strong \nbottleneck")) +
theme_LO +
theme(strip.background = element_rect(color="grey30",
fill="white", size=0.5, linetype="solid"),
strip.text.x = element_text(size = 12, color = "black", face = "bold"),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
PLOT_prop

#
# cowplot::save_plot(here::here("figures","FigS2_Life_stage_proportion.pdf"), PLOT_prop,
# base_height = 10/cm(1), base_width = 18/cm(1), dpi = 610)
#
#
Growth rate and
He
# Remove pseudoreplication: average of each population
SUM_POP_He_Mean<- Rmisc::summarySE(data_phenotyping,
measurevar = c("Lambda"),
groupvars = c("Genetic_Diversity",
"He_end",
"Population"),
na.rm=TRUE)
PLOT_He_Final <- ggplot2::ggplot(SUM_POP_He_Mean, aes(x = He_end,
y = Lambda,
colour = Genetic_Diversity)) +
geom_point(size = 2, alpha = 0.8) +
geom_errorbar(data = SUM_POP_He_Mean, aes(x = He_end,
ymin = Lambda-se, ymax = Lambda+se,
colour = Genetic_Diversity),
size = 0.6, width=0, alpha = 0.8) +
geom_point(size = 3, alpha = 0.8) +
ylab("Growth rate") +
xlab("Expected heterozygosity") +
scale_color_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FC4E07","#E7B800")) +
labs(size="Replicates") +
theme_LO
PLOT_He_Final

#
#
# cowplot::save_plot(here::here("figures","Heterozygosity_Growthrate.pdf"),
# PLOT_He_Final,
# base_height = 12/cm(1), base_width = 16/cm(1), dpi = 610)
#
#
# # ALL WITHOUT PSEUDO
# SUM_POP_He_ALL<- Rmisc::summarySE(data_evolution_Lambda,
# measurevar = c("Lambda"),
# groupvars = c("Genetic_Diversity","Phenotyping", "He_remain", "He_end",
# "Population"),
# na.rm=TRUE)
#
# SUM_POP_He_ALL$HE <- ifelse(SUM_POP_He_ALL$Phenotyping=="Initial",SUM_POP_He_ALL$He_remain,SUM_POP_He_ALL$He_end)
#
# SUM_POP_He_ALL <- SUM_POP_He_ALL[!is.na(SUM_POP_He_ALL$He_end),]
# SUM_POP_He_ALL <- SUM_POP_He_ALL[SUM_POP_He_ALL$HE!="-Inf",]
#
#
# facet_names <- c('Initial'="Before rescue experiment",'Final'="After rescue experiment")
#
# PLOT_He_ALL <- ggplot2::ggplot(SUM_POP_He_ALL, aes(x = HE, y = Lambda,
# colour = Genetic_Diversity,
# shape = Phenotyping)) +
# #facet_wrap(~ Phenotyping, ncol=1) +
# geom_point(size = 3, alpha = 0.8) +
# scale_shape_manual(values = c(1, 19)) +
# ylab("Growth rate") +
# xlab("Remaining heterozygosity") +
# theme_LO
# PLOT_He_ALL
#
#
# PLOT_He_ALL <- ggplot2::ggplot(SUM_POP_He_ALL, aes(x = HE, y = Lambda,
# colour = Genetic_Diversity,
# shape = Phenotyping)) +
# facet_wrap(~ Phenotyping, ncol=1) +
# geom_point(size = 3, alpha = 0.8) +
# scale_shape_manual(values = c(1, 19)) +
# ylab("Growth rate") +
# xlab("Remaining heterozygosity") +
# theme_LO
# PLOT_He_ALL
#
#
# PLOT_He_ALL <- ggplot2::ggplot(SUM_POP_He_ALL, aes(x = HE, y = Lambda,
# colour = Genetic_Diversity)) +
# facet_wrap(~ Phenotyping, ncol=1) +
# geom_point(size = 3, alpha = 0.8) +
# ylab("Growth rate") +
# xlab("Remaining heterozygosity") +
# theme_LO
# PLOT_He_ALL
Lifestage and He
SUM_Prop_adults<- Rmisc::summarySE(data_phenotyping_all,
measurevar = c("p_adults"),
groupvars = c("Genetic_Diversity","Week", "Population", "He_end"),
na.rm = TRUE)
colnames(SUM_Prop_adults)[6]<-"prop"
SUM_Prop_adults$lifestage <- "adults"
SUM_Prop_pupae<- Rmisc::summarySE(data_phenotyping_all,
measurevar = c("p_pupae"),
groupvars = c("Genetic_Diversity","Week", "Population", "He_end"),
na.rm = TRUE)
colnames(SUM_Prop_pupae)[6]<-"prop"
SUM_Prop_pupae$lifestage <- "pupae"
SUM_Prop_larvae<- Rmisc::summarySE(data_phenotyping_all,
measurevar = c("p_larvae"),
groupvars = c("Genetic_Diversity","Week",
"Population", "He_end"),
na.rm = TRUE)
colnames(SUM_Prop_larvae)[6]<-"prop"
SUM_Prop_larvae$lifestage <- "larvae"
SUM_Prop_POP <- rbind(SUM_Prop_adults, SUM_Prop_pupae, SUM_Prop_larvae)
PLOT_Proportion_lifestage <- ggplot2::ggplot(SUM_Prop_POP, aes(x = He_end,
y = prop,
colour = Genetic_Diversity,
size = N)) +
facet_grid( Week ~ factor(lifestage, levels=c('larvae','pupae','adults'))) +
geom_point(alpha = 0.8) +
ylab("Proportion") +
xlab("Expected heterozygosity") +
scale_color_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FC4E07", "#E7B800")) +
labs(size="Replicates") +
theme_LO + theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
PLOT_Proportion_lifestage

#
# cowplot::save_plot(here::here("figures","Life_stage_proportion_He.pdf"),
# PLOT_Proportion_lifestage,
# base_height = 14/cm(1), base_width = 36/cm(1), dpi = 610)
#
#
He over time
#Compute for all the other genrations, except the first one
data$ID_extinction <- "extant"
for(i in unique(data$Population[data$Extinction_finalstatus=="yes"])){
gen_all <- data$Generation_Eggs[data$Population==i&!is.na(data$He_remain)]
maxgen <- max(gen_all)
data$ID_extinction[data$Population==i&data$Generation_Eggs==maxgen] <- "willextinct"
}
data[data$ID_extinction=="willextinct",]
## Population Block Old_Label Label
## 205 Low16 Block4 B4-O1 7/14 BE B4 | G5 Low 16
## 273 Low27 Block5 B5-B1 6/16 BE B5 | G4 Low 27
## 278 Low28 Block5 B5_E1 5/12 BE B5 | G3 Low 28
## 299 Low30 Block5 B5-D1 6/16 BE B5 | G4 Low 30
## 303 Low31 Block5 B(2)5-P1 6/16 BE B5 | G4 Low 31
## 310 Low32 Block5 B(2)5_Q1 5/12 BE B5 | G3 Low 32
## 317 Low33 Block5 B(2)5-T1 7/21 BE B5 | G5 Low 33
## 336 Low36 Block5 B(2)5-X1 6/16 BE B5 | G4 Low 36
## 402 Med14 Block4 B4-Backup Fam L 6/9 BE B4 | G4 Med 14
## 409 Med17 Block5 B5_Backup_Fam_F 5/12 BE B5 | G3 Med 17
## 437 Med20 Block5 B5 - Backup Fam I 6/16 BE B5 | G4 Med 20
## 449 Med22 Block5 B5_Backup_Fam_B 5/12 BE B5 | G3 Med 22
## 479 Med5 Block3 B3 - Backup Fam E 7/7 BE B3 | G5 Med 5
## Genetic_Diversity Generation_Parents Generation_Eggs Date_Census
## 205 Low 4 5 7/14/21
## 273 Low 3 4 6/16/21
## 278 Low 2 3 5/12/21
## 299 Low 3 4 6/16/21
## 303 Low 3 4 6/16/21
## 310 Low 2 3 5/12/21
## 317 Low 4 5 7/21/21
## 336 Low 3 4 6/16/21
## 402 Medium 3 4 6/9/21
## 409 Medium 2 3 5/12/21
## 437 Medium 3 4 6/16/21
## 449 Medium 2 3 5/12/21
## 479 Medium 4 5 7/7/21
## Date_Start_Eggs Date_End_Eggs Image_Box1 Image_Box2 MC_Box1 MC_Box2
## 205 7/14/21 7/15/21 <NA> DSC_0994 0.000000 2.000000
## 273 6/16/21 6/17/21 DSC_0526 DSC_0527 5.090878 0.000000
## 278 5/12/21 5/13/21 DSC_0918 DSC_0919 3.085768 3.638847
## 299 6/16/21 6/17/21 DSC_0559 <NA> 1.383920 0.000000
## 303 6/16/21 6/17/21 <NA> DSC_0541 0.000000 1.000000
## 310 5/12/21 5/13/21 DSC_0953 DSC_0954 88.254170 70.952124
## 317 7/21/21 7/22/21 DSC_0117 DSC_0118 1.000000 1.000000
## 336 6/16/21 6/17/21 DSC_0540 <NA> 1.000000 0.000000
## 402 6/9/21 6/10/21 DSC_0376 DSC_0377 10.974990 1.000000
## 409 5/12/21 5/13/21 DSC_0916 DSC_0917 1.543122 1.084327
## 437 6/16/21 6/17/21 DSC_0542 <NA> 1.000000 0.000000
## 449 5/12/21 5/13/21 DSC_0922 DSC_0923 9.168447 6.251069
## 479 7/7/21 7/8/21 <NA> DSC_0830 NA NA
## MC_Total_Adults HC_Box1 HC_Box2 HC_Total_Adults Nb_parents Growth_rate
## 205 2.0 0 2 2 5 0.400000000
## 273 5.1 3 0 3 19 0.157894737
## 278 6.7 1 1 2 374 0.005347594
## 299 1.4 1 0 1 146 0.006849315
## 303 1.0 1 1 2 272 0.007352941
## 310 159.2 71 56 127 276 0.460144928
## 317 2.0 1 1 2 3 0.666666667
## 336 1.0 1 0 1 22 0.045454545
## 402 12.0 7 1 8 138 0.057971014
## 409 2.6 3 2 5 416 0.012019231
## 437 1.0 1 0 1 20 0.050000000
## 449 15.4 10 6 16 200 0.080000000
## 479 0.0 NA NA 1 11 0.090909091
## First_Throw Extinction_finalstatus Less_than_5 Nb_adults Lambda obs
## 205 no yes yes 2 0.400000000 378
## 273 no yes yes 3 0.157894737 319
## 278 no yes yes 2 0.005347594 236
## 299 no yes yes 1 0.006849315 322
## 303 no yes yes 2 0.007352941 323
## 310 no yes yes 127 0.460144928 240
## 317 no yes yes 2 0.666666667 409
## 336 no yes yes 1 0.045454545 328
## 402 no yes yes 8 0.057971014 308
## 409 no yes yes 5 0.012019231 245
## 437 no yes yes 1 0.050000000 332
## 449 no yes yes 16 0.080000000 250
## 479 no yes yes 1 0.090909091 359
## gen_square He_start He_lost_gen_t He_remain He_exp He_end
## 205 25 0.5544299 0.7500000 0.3575672 0.6449276 0.3575672
## 273 16 0.5367998 0.8333333 0.4324691 0.8056432 0.4324691
## 278 9 0.5386585 0.7500000 0.4014365 0.7452523 0.4014365
## 299 16 0.5540444 0.5000000 0.2742819 0.4950540 0.2742819
## 303 16 0.5532775 0.7500000 0.4116634 0.7440450 0.4116634
## 310 9 0.5528880 0.9960630 0.5469651 0.9892872 0.5469651
## 317 25 0.5523333 0.7500000 0.3359893 0.6083089 0.3359893
## 336 16 0.5427371 0.5000000 0.2635269 0.4855518 0.2635269
## 402 16 0.6802331 0.9375000 0.6315974 0.9285014 0.6315974
## 409 9 0.6825509 0.9000000 0.6104897 0.8944237 0.6104897
## 437 16 0.6819650 0.5000000 0.3302071 0.4841994 0.3302071
## 449 9 0.6809168 0.9687500 0.6546991 0.9614965 0.6546991
## 479 25 0.6807065 0.5000000 0.3209456 0.4714889 0.3209456
## ID_extinction
## 205 willextinct
## 273 willextinct
## 278 willextinct
## 299 willextinct
## 303 willextinct
## 310 willextinct
## 317 willextinct
## 336 willextinct
## 402 willextinct
## 409 willextinct
## 437 willextinct
## 449 willextinct
## 479 willextinct
dim(data[data$ID_extinction=="willextinct",])
## [1] 13 33
data[data$ID_extinction=="willextinct","Population"]
## [1] Low16 Low27 Low28 Low30 Low31 Low32 Low33 Low36 Med14 Med17 Med20 Med22
## [13] Med5
## 84 Levels: High1 High13 High14 High15 High16 High17 High18 High19 ... Med9
data$Population[data$Genetic_Diversity=="Medium"&data$Extinction_finalstatus=="yes"]
## [1] Med14 Med14 Med14 Med14 Med14 Med14 Med17 Med17 Med17 Med17 Med17 Med17
## [13] Med20 Med20 Med20 Med20 Med20 Med20 Med22 Med22 Med22 Med22 Med22 Med22
## [25] Med5 Med5 Med5 Med5 Med5 Med5
## 84 Levels: High1 High13 High14 High15 High16 High17 High18 High19 ... Med9
PLOT_He_extinction <-ggplot2::ggplot(data, aes(x = factor(Generation_Eggs),
y = He_remain,
group = Population,
colour = Extinction_finalstatus,
shape = ID_extinction)) +
geom_line(position = position_dodge(0.5),
size = 0.25, alpha = 0.6) +
geom_point(position = position_dodge(0.5), size = 3, stroke = 0.6,
alpha = 0.8) +
ylab("Expected heterozygozity") +
scale_color_manual(values = c("black", "red")) +
scale_shape_manual(values = c(21, 13), guide=FALSE) +
labs(color="Extinct populations") +
guides(color = guide_legend(
override.aes=list(shape = 21, fill="white"))) +
xlab("Generation") +
theme_LO
PLOT_He_extinction

# #
# cowplot::save_plot(here::here("figures","FigS1_He_over_time.pdf"),
# PLOT_He_extinction,
# base_height = 12/cm(1), base_width = 14/cm(1), dpi = 610)
#
tapply(data$He_remain[data$Generation_Eggs==1], data$Genetic_Diversity[data$Generation_Eggs==1], mean)
## High Medium Low
## 0.9936078 0.6757290 0.5414677
ANALYSIS
Survival
analysis
Basic model
library(dplyr)
################################# SURVIVAL ANALYSIS
str(data_survival)
## 'data.frame': 84 obs. of 6 variables:
## $ Population : Factor w/ 84 levels "High1","High13",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Block : Factor w/ 3 levels "Block3","Block4",..: 1 2 2 2 2 2 2 2 1 2 ...
## $ Genetic_Diversity: Factor w/ 3 levels "High","Medium",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Gen_eggs_extinct : int 6 6 6 6 6 6 6 6 6 6 ...
## $ Survival : Factor w/ 2 levels "extinct","yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ status : num 0 0 0 0 0 0 0 0 0 0 ...
#Note: the Surv() function in the {survival} package accepts by default TRUE/FALSE, where TRUE is event and FALSE is censored; 1/0 whejere 1 is event and 0 is censored; or 2/1 where 2 is event and 1 is censored. Please take care to ensure the event indicator is properly formatted.
# The Surv() function from the {survival} package creates a survival object for use as the response in a model formula. There will be one entry for each subject that is the survival time, which is followed by a + if the subject was censored. Let’s look at the first 10 observations:
survival::Surv(data_survival$Gen_eggs_extinct, data_survival$status)
## [1] 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+
## [26] 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 5 4 6+ 6+ 5
## [51] 5 4 6 6+ 6+ 5 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 6+ 5 6+ 4 6+ 6+ 6+ 5 6+ 4
## [76] 6+ 6+ 6+ 6+ 6 6+ 6+ 6+ 6+
#Surv(data_survival_all$Gen_eggs_extinct, data_survival$status)
# The survfit() function creates survival curves using the Kaplan-Meier method based on a formula. Let’s generate the overall survival curve for the entire cohort, assign it to object s1, and look at the structure using str():
s1 <- survival::survfit(survival::Surv(Gen_eggs_extinct, status) ~ 1, data = data_survival)
str(s1)
## List of 16
## $ n : int 84
## $ time : num [1:3] 4 5 6
## $ n.risk : num [1:3] 84 80 74
## $ n.event : num [1:3] 4 6 3
## $ n.censor : num [1:3] 0 0 71
## $ surv : num [1:3] 0.952 0.881 0.845
## $ std.err : num [1:3] 0.0244 0.0401 0.0467
## $ cumhaz : num [1:3] 0.0476 0.1226 0.1632
## $ std.chaz : num [1:3] 0.0238 0.0388 0.0453
## $ type : chr "right"
## $ logse : logi TRUE
## $ conf.int : num 0.95
## $ conf.type: chr "log"
## $ lower : num [1:3] 0.908 0.814 0.771
## $ upper : num [1:3] 0.999 0.953 0.926
## $ call : language survfit(formula = survival::Surv(Gen_eggs_extinct, status) ~ 1, data = data_survival)
## - attr(*, "class")= chr "survfit"
# time: the timepoints at which the curve has a step, i.e. at least one event occurred
# surv: the estimate of survival at the corresponding time
# The {ggsurvfit} package works best if you create the survfit object using the included ggsurvfit::survfit2() function, which uses the same syntax to what we saw previously with survival::survfit(). The ggsurvfit::survfit2() tracks the environment from the function call, which allows the plot to have better default values for labeling and p-value reporting.
#
####
#### PLOT
####
ggsurvfit::survfit2(survival::Surv(Gen_eggs_extinct, status) ~ 1, data = data_survival) %>%
ggsurvfit::ggsurvfit() +
ggplot2::labs( x = "Generation",
y = "Overall survival probability") +
ggsurvfit::add_confidence_interval() +
ggsurvfit::add_risktable()

summary(survival::survfit(survival::Surv(Gen_eggs_extinct, status) ~ 1, data = data_survival))
## Call: survfit(formula = survival::Surv(Gen_eggs_extinct, status) ~
## 1, data = data_survival)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 4 84 4 0.952 0.0232 0.908 0.999
## 5 80 6 0.881 0.0353 0.814 0.953
## 6 74 3 0.845 0.0395 0.771 0.926
Comparison across
groups
# library(lubridate)
# library("tidycmprsk")
# library("condSURV")
#########
######################################################
##### COMPARISONS ACROSS GROUPS
######################################################
# We can conduct between-group significance tests using a log-rank test. The log-rank test equally weights observations over the entire follow-up time and is the most common way to compare survival times between groups. There are versions that more heavily weight the early or late follow-up that could be more appropriate depending on the research question (see ?survdiff for different test options).
#
# We get the log-rank p-value using the survdiff function. For example, we can test whether there was a difference in survival time according to the demography history in the lung data:
#Comparison
survival::survdiff(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, data = data_survival)
## Call:
## survival::survdiff(formula = survival::Surv(Gen_eggs_extinct,
## status) ~ Genetic_Diversity, data = data_survival)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Genetic_Diversity=High 27 0 4.41 4.405 7.00
## Genetic_Diversity=Medium 23 5 3.44 0.707 1.01
## Genetic_Diversity=Low 34 8 5.15 1.571 2.73
##
## Chisq= 7 on 2 degrees of freedom, p= 0.03
summary(survival::survfit(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity,
data = data_survival))
## Call: survfit(formula = survival::Surv(Gen_eggs_extinct, status) ~
## Genetic_Diversity, data = data_survival)
##
## Genetic_Diversity=High
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
##
## Genetic_Diversity=Medium
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 4 23 2 0.913 0.0588 0.805 1.000
## 5 21 2 0.826 0.0790 0.685 0.996
## 6 19 1 0.783 0.0860 0.631 0.971
##
## Genetic_Diversity=Low
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 4 34 2 0.941 0.0404 0.865 1.000
## 5 32 4 0.824 0.0654 0.705 0.962
## 6 28 2 0.765 0.0727 0.635 0.921
##################
################## PLOT
##################
plot_survival <- ggsurvfit::survfit2(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity,
data = data_survival) %>%
ggsurvfit::ggsurvfit(size = 2) +
labs( x = "Generation",
y = "Overall survival probability") +
ggsurvfit::add_confidence_interval(alpha = 0.1) +
ggsurvfit::add_censor_mark(size = 8, shape = "x") +
scale_fill_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FD7C49","#F5C400")) +
scale_color_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FD7C49","#F5C400"))
plot_survival

#rm(plot_survival)
cowplot::save_plot(file = here::here("figures","Extinction_Survival_analysis.pdf"),
plot_survival, base_height = 12/cm(1), base_width = 18/cm(1), dpi = 610)
# COX representation
s1 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity + Block,
data = data_survival)
s2 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Block,
data = data_survival)
anova(s1, s2)
## Analysis of Deviance Table
## Cox model: response is survival::Surv(Gen_eggs_extinct, status)
## Model 1: ~ Genetic_Diversity + Block
## Model 2: ~ Block
## loglik Chisq Df Pr(>|Chi|)
## 1 -43.520
## 2 -49.654 12.268 2 0.002168 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, data = data_survival) %>%
gtsummary::tbl_regression(exp = TRUE)
| Characteristic |
HR |
95% CI |
p-value |
| Genetic_Diversity |
|
|
|
| High |
— |
— |
|
| Medium |
347,496,586 |
0.00, Inf |
>0.9 |
| Low |
372,735,433 |
0.00, Inf |
>0.9 |
# The quantity of interest from a Cox regression model is a hazard ratio (HR). The HR represents the ratio of hazards between two groups at any particular point in time. The HR is interpreted as the instantaneous rate of occurrence of the event of interest in those who are still at risk for the event. It is not a risk, though it is commonly mis-interpreted as such. If you have a regression parameter β, then HR = exp(β).
#
# A HR < 1 indicates reduced hazard of death whereas a HR > 1 indicates an increased hazard of death.
#
# So the HR = 0.59 implies that 0.59 times as many females are dying as males, at any given time. Stated differently, females have a significantly lower hazard of death than males in these data.
Deal with the no
event in diverse issue
#https://stats.stackexchange.com/questions/124821/dealing-with-no-events-in-one-treatment-group-survival-analysis
##############
######## 2- Augmented Data: As in Scucz et al. 2017 - Ecology
##############
str(data_survival)
## 'data.frame': 84 obs. of 6 variables:
## $ Population : Factor w/ 84 levels "High1","High13",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Block : Factor w/ 3 levels "Block3","Block4",..: 1 2 2 2 2 2 2 2 1 2 ...
## $ Genetic_Diversity: Factor w/ 3 levels "High","Medium",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Gen_eggs_extinct : int 6 6 6 6 6 6 6 6 6 6 ...
## $ Survival : Factor w/ 2 levels "extinct","yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ status : num 0 0 0 0 0 0 0 0 0 0 ...
data_survival_augmented <- rbind(data_survival,
c("HighX", "Block3", "High", "6", "extinct", "1"))
str(data_survival)
## 'data.frame': 84 obs. of 6 variables:
## $ Population : Factor w/ 84 levels "High1","High13",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Block : Factor w/ 3 levels "Block3","Block4",..: 1 2 2 2 2 2 2 2 1 2 ...
## $ Genetic_Diversity: Factor w/ 3 levels "High","Medium",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Gen_eggs_extinct : int 6 6 6 6 6 6 6 6 6 6 ...
## $ Survival : Factor w/ 2 levels "extinct","yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ status : num 0 0 0 0 0 0 0 0 0 0 ...
str(data_survival_augmented)
## 'data.frame': 85 obs. of 6 variables:
## $ Population : Factor w/ 84 levels "High1","High13",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Block : Factor w/ 3 levels "Block3","Block4",..: 1 2 2 2 2 2 2 2 1 2 ...
## $ Genetic_Diversity: Factor w/ 3 levels "High","Medium",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Gen_eggs_extinct : chr "6" "6" "6" "6" ...
## $ Survival : Factor w/ 2 levels "extinct","yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ status : chr "0" "0" "0" "0" ...
data_survival_augmented$status <- as.numeric(data_survival_augmented$status)
data_survival_augmented$Gen_eggs_extinct <- as.numeric(data_survival_augmented$Gen_eggs_extinct)
#With real data
s1 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity + Block,
data = data_survival)
s2 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Block,
data = data_survival)
anova(s1, s2)
## Analysis of Deviance Table
## Cox model: response is survival::Surv(Gen_eggs_extinct, status)
## Model 1: ~ Genetic_Diversity + Block
## Model 2: ~ Block
## loglik Chisq Df Pr(>|Chi|)
## 1 -43.520
## 2 -49.654 12.268 2 0.002168 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#With augmented data
s1 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity + Block,
data = data_survival_augmented)
s2 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Block,
data = data_survival_augmented)
anova(s1, s2)
## Analysis of Deviance Table
## Cox model: response is survival::Surv(Gen_eggs_extinct, status)
## Model 1: ~ Genetic_Diversity + Block
## Model 2: ~ Block
## loglik Chisq Df Pr(>|Chi|)
## 1 -51.535
## 2 -55.049 7.0288 2 0.02977 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity,
data = data_survival_augmented) %>%
gtsummary::tbl_regression(exp = TRUE)
| Characteristic |
HR |
95% CI |
p-value |
| Genetic_Diversity |
|
|
|
| High |
— |
— |
|
| Medium |
6.90 |
0.81, 59.0 |
0.078 |
| Low |
7.41 |
0.93, 59.2 |
0.059 |
s1 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity-1,
data = data_survival_augmented)
sum_s1<-summary(s1)
sum_s1
## Call:
## survival::coxph(formula = survival::Surv(Gen_eggs_extinct, status) ~
## Genetic_Diversity - 1, data = data_survival_augmented)
##
## n= 85, number of events= 14
##
## coef exp(coef) se(coef) z Pr(>|z|)
## Genetic_DiversityMedium 1.931 6.895 1.096 1.762 0.0780 .
## Genetic_DiversityLow 2.002 7.407 1.061 1.888 0.0591 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## Genetic_DiversityMedium 6.895 0.145 0.8054 59.04
## Genetic_DiversityLow 7.407 0.135 0.9261 59.25
##
## Concordance= 0.654 (se = 0.057 )
## Likelihood ratio test= 6.33 on 2 df, p=0.04
## Wald test = 3.64 on 2 df, p=0.2
## Score (logrank) test = 4.98 on 2 df, p=0.08
confint(s1)
## 2.5 % 97.5 %
## Genetic_DiversityMedium -0.21646090 4.078164
## Genetic_DiversityLow -0.07673546 4.081714
exp(sum_s1$coefficients)
## coef exp(coef) se(coef) z Pr(>|z|)
## Genetic_DiversityMedium 6.895378 987.6992 2.990940 5.826339 1.081127
## Genetic_DiversityLow 7.407471 1648.2534 2.888821 6.603699 1.060855
survival::survfit(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity,
data = data_survival_augmented)
## Call: survfit(formula = survival::Surv(Gen_eggs_extinct, status) ~
## Genetic_Diversity, data = data_survival_augmented)
##
## n events median 0.95LCL 0.95UCL
## Genetic_Diversity=High 28 1 NA NA NA
## Genetic_Diversity=Medium 23 5 NA NA NA
## Genetic_Diversity=Low 34 8 NA NA NA
summary(survival::survfit(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity,
data = data_survival_augmented))
## Call: survfit(formula = survival::Surv(Gen_eggs_extinct, status) ~
## Genetic_Diversity, data = data_survival_augmented)
##
## Genetic_Diversity=High
## time n.risk n.event survival std.err lower 95% CI
## 6.0000 28.0000 1.0000 0.9643 0.0351 0.8979
## upper 95% CI
## 1.0000
##
## Genetic_Diversity=Medium
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 4 23 2 0.913 0.0588 0.805 1.000
## 5 21 2 0.826 0.0790 0.685 0.996
## 6 19 1 0.783 0.0860 0.631 0.971
##
## Genetic_Diversity=Low
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 4 34 2 0.941 0.0404 0.865 1.000
## 5 32 4 0.824 0.0654 0.705 0.962
## 6 28 2 0.765 0.0727 0.635 0.921
##############
######## 3- Consider only bottlenecked (LRT possible)
##############
data_survival_bottlenecked <- data_survival[data_survival$Genetic_Diversity!="High",]
data_survival_bottlenecked<-droplevels(data_survival_bottlenecked)
#With real data - But only bottlenecked
s1 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity + Block,
data = data_survival_bottlenecked)
s2 <- survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Block,
data = data_survival_bottlenecked)
anova(s1, s2)
## Analysis of Deviance Table
## Cox model: response is survival::Surv(Gen_eggs_extinct, status)
## Model 1: ~ Genetic_Diversity + Block
## Model 2: ~ Block
## loglik Chisq Df Pr(>|Chi|)
## 1 -43.520
## 2 -43.574 0.1084 1 0.742
survival::coxph(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity,
data = data_survival_bottlenecked) %>%
gtsummary::tbl_regression(exp = TRUE)
| Characteristic |
HR |
95% CI |
p-value |
| Genetic_Diversity |
|
|
|
| Medium |
— |
— |
|
| Low |
1.07 |
0.35, 3.28 |
>0.9 |
##############
######## Good- Comparison without estimates
##############
#Log Rank Test in R, the most frequent technique to compare survival curves between two groups is to use a log-rank test.
# Test hypotheses:
# Ho: In terms of survivability, there is no difference between the two groups.
# Hi: There is a survival differential between the two groups.
# We can reject the null hypothesis and infer that there is enough evidence to claim there is a difference in survival between the two groups if the p-value of the test is less than 0.05 (95% confidence level).
#Comparison
survival::survdiff(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity,
data = data_survival)
## Call:
## survival::survdiff(formula = survival::Surv(Gen_eggs_extinct,
## status) ~ Genetic_Diversity, data = data_survival)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## Genetic_Diversity=High 27 0 4.41 4.405 7.00
## Genetic_Diversity=Medium 23 5 3.44 0.707 1.01
## Genetic_Diversity=Low 34 8 5.15 1.571 2.73
##
## Chisq= 7 on 2 degrees of freedom, p= 0.03
#The Chi-Squared test statistic is 7 with 2 degree of freedom and the corresponding p-value is 0.03.
#Since this p-value is less than 0.05, we reject the null hypothesis.
#In other words, we have sufficient evidence to say that there is a statistically significant difference in survival between the three groups.
# survfit(Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, data = data_survival)
# summary(survfit(Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, data = data_survival))
#
#
Other
representation
### OTHER REPRESENTATION :
s2 <- survival::survfit(survival::Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity,
data = data_survival)
summary(s2)
## Call: survfit(formula = survival::Surv(Gen_eggs_extinct, status) ~
## Genetic_Diversity, data = data_survival)
##
## Genetic_Diversity=High
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
##
## Genetic_Diversity=Medium
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 4 23 2 0.913 0.0588 0.805 1.000
## 5 21 2 0.826 0.0790 0.685 0.996
## 6 19 1 0.783 0.0860 0.631 0.971
##
## Genetic_Diversity=Low
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 4 34 2 0.941 0.0404 0.865 1.000
## 5 32 4 0.824 0.0654 0.705 0.962
## 6 28 2 0.765 0.0727 0.635 0.921
print(s2)
## Call: survfit(formula = survival::Surv(Gen_eggs_extinct, status) ~
## Genetic_Diversity, data = data_survival)
##
## n events median 0.95LCL 0.95UCL
## Genetic_Diversity=High 27 0 NA NA NA
## Genetic_Diversity=Medium 23 5 NA NA NA
## Genetic_Diversity=Low 34 8 NA NA NA
# Access to the sort summary table
summary(s2)$table
## records n.max n.start events rmean se(rmean)
## Genetic_Diversity=High 27 27 27 0 6.000000 0.00000000
## Genetic_Diversity=Medium 23 23 23 5 5.739130 0.12627260
## Genetic_Diversity=Low 34 34 34 8 5.764706 0.09355367
## median 0.95LCL 0.95UCL
## Genetic_Diversity=High NA NA NA
## Genetic_Diversity=Medium NA NA NA
## Genetic_Diversity=Low NA NA NA
d <- data.frame(time = s2$time,
n.risk = s2$n.risk,
n.event = s2$n.event,
n.censor = s2$n.censor,
surv = s2$surv,
upper = s2$upper,
lower = s2$lower)
head(d)
## time n.risk n.event n.censor surv upper lower
## 1 6 27 0 27 1.0000000 1.0000000 1.0000000
## 2 4 23 2 0 0.9130435 1.0000000 0.8048548
## 3 5 21 2 0 0.8260870 0.9964666 0.6848395
## 4 6 19 1 18 0.7826087 0.9707088 0.6309579
## 5 4 34 2 0 0.9411765 1.0000000 0.8653187
## 6 5 32 4 0 0.8235294 0.9621763 0.7048611
plot2 <- survminer::ggsurvplot(s2,
pval = TRUE, # show p-value of log-rank test.
#pval.coord = c(1,0.2),
pval.size = 5,
conf.int = TRUE, # show confidence intervals for point estimaes of survival curves.
# conf.int.style = "step", # customize style of confidence intervals
conf.int.style = "ribbon", # customize style of confidence intervals
conf.int.alpha = 0.3, # customize style of confidence intervals
censor = TRUE,
censor.shape = "x",
censor.size = 6,
xlab = "Generation", # customize X axis label.
break.time.by = 1, # break X axis in time intervals by 1
ggtheme = theme_light(), # customize plot and risk table with a theme.
#risk.table = TRUE, # Add risk table
#linetype = "strata", # Change line type by groups
risk.table = "abs_pct", # absolute number and percentage at risk.
risk.table.y.text.col = T,# colour risk table text annotations.
risk.table.y.text = FALSE,# show bars instead of names in text annotations in legend of risk table.
risk.table.col = "strata", # Change risk table color by groups
legend.labs = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
legend = c("right"),
legend.title = c("Demographic\nhistory:"),
palette = c("#00AFBB", "#E7B800", "#FC4E07"))
plot2

#
# cowplot::save_plot(file = here::here("figures","Extinction_Survival_analysis_V2.pdf"),
# plot2, base_height = 10/cm(1), base_width = 15/cm(1), dpi = 610)
# Fit a Cox proportional hazards model
# fit.coxph <- coxph(Surv(Gen_eggs_extinct, status) ~ Genetic_Diversity, data = data_survival)
# ggforest(fit.coxph, data = data_survival)
Probability of
extinction
############
###### Clean dataset
############
#Prepare clean dataset
data_proba_extinction <- data[data$Generation_Eggs==6,
c("Block","Population","Genetic_Diversity","Extinction_finalstatus")]
data_proba_extinction$Extinction_finalstatus <- as.factor(data_proba_extinction$Extinction_finalstatus)
data_proba_extinction$y <- ifelse(data_proba_extinction$Extinction_finalstatus == "yes", 1, 0)
############
###### Analysis
############
# #Analysis
# mod0 <- glm(y ~ Genetic_Diversity + Block,
# data = data_proba_extinction, family = binomial)
#
# mod0 <- glm(y ~ Genetic_Diversity-1 + Block,
# data = data_proba_extinction, family = binomial)
#
#
# #But issue of convergence
# plogis(confint(mod0))
# ############
# ###### Analysis
# ############
#
#
# summary(mod0)
#
#
#
# #Test convergence
# mod1 <- glm(y ~ Genetic_Diversity + Block ,
# data = data_proba_extinction[data_proba_extinction$Genetic_Diversity!="High",],
# family = binomial)
#
# #Test genetic diversity effect
# mod0 <- glm(y ~ Block ,
# data = data_proba_extinction[data_proba_extinction$Genetic_Diversity!="High",], family = binomial)
# anova(mod0, mod1, test = "Chisq")
# #We remove genetic diversity
#
# # Perform the lrtest
# (A <- logLik(mod1))
# (B <- logLik(mod0))
# #Since the logLik() function gives more information than the numeric value, use the as.numeric() function to isolate the numeric value
# (teststat <- -2 * (as.numeric(A)-as.numeric(B)))
# #df = 5 - 3 = 2
# (p.val <- pchisq(teststat, df = 2, lower.tail = FALSE))
#
# lmtest::lrtest(mod1, mod0)
############
###### CONFIDENCE INTERVAL
############
# mod0 <- glm(y ~ Genetic_Diversity-1 + Block ,
# data = data_proba_extinction[data_proba_extinction$Genetic_Diversity!="High",], family = binomial)
# summary(mod0)
#
# #Extract probability of extinction
# #(p_high <- plogis(-21.3144))
# (p_med <- plogis(-3.0142))
# (p_low <- plogis(-2.8082))
#
#
# # Confidence interval
# table_raw <- confint(mod0)
# table_ci <- plogis(table_raw)
# table_ci
#
# ############
# ###### Predicted value
# ############
#
#
# data_predict_extinct <- data.frame(Genetic_Diversity = levels(data_proba_extinction$Genetic_Diversity),
# Block = "Block4")
#
# data_predict_extinct$predict <- predict(mod0, type = "response",
# re.form = NA,
# newdata = data_predict_extinct)
#
# data_predict_extinct
# p_high
# p_med
# p_low
#
#
# ############
# ###### Posthoc
# ############
# emmeans::emmeans(mod0, list(pairwise ~ Genetic_Diversity), adjust = "tukey")
#
##############
######## 1- REAL DATA: issue with high no extinction
##############
mod0 <- glm(y ~ Genetic_Diversity-1 + Block ,
data = data_proba_extinction, family = binomial)
summary(mod0)
##
## Call:
## glm(formula = y ~ Genetic_Diversity - 1 + Block, family = binomial,
## data = data_proba_extinction)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.26011 -0.44258 -0.30957 -0.00005 2.47474
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Genetic_DiversityHigh -21.3144 1855.7435 -0.011 0.99084
## Genetic_DiversityMedium -3.0142 1.1293 -2.669 0.00760 **
## Genetic_DiversityLow -2.8082 1.0659 -2.635 0.00843 **
## BlockBlock4 0.7402 1.2714 0.582 0.56046
## BlockBlock5 3.0006 1.1265 2.664 0.00773 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 116.449 on 84 degrees of freedom
## Residual deviance: 46.833 on 79 degrees of freedom
## AIC: 56.833
##
## Number of Fisher Scoring iterations: 18
(p_high <- plogis(-21.3144))
## [1] 5.536989e-10
(p_med <- plogis(-3.0142))
## [1] 0.04678847
(p_low <- plogis(-2.8082))
## [1] 0.05688267
# Confidence interval: high [0,1]
plogis(confint(mod0))
## 2.5 % 97.5 %
## Genetic_DiversityHigh 1.689103e-236 1.0000000
## Genetic_DiversityMedium 2.415647e-03 0.2298311
## Genetic_DiversityLow 3.204498e-03 0.2431697
## BlockBlock4 1.553712e-01 0.9794348
## BlockBlock5 7.579725e-01 0.9975064
## ISSUE: DOESNT CONVERGE
mod0 <- glm(y ~ Genetic_Diversity + Block ,
data = data_proba_extinction, family = binomial)
mod1 <- glm(y ~ Block ,
data = data_proba_extinction, family = binomial)
anova(mod1,mod0,test="Chisq")
## Analysis of Deviance Table
##
## Model 1: y ~ Block
## Model 2: y ~ Genetic_Diversity + Block
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 81 58.903
## 2 79 46.833 2 12.069 0.002394 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##############
######## 2- Augmented Data: As in Scucz et al. 2017 - Ecology
##############
# data_proba_extinction_augmented <- rbind(data_proba_extinction,
# # c("Block3","HighX","High","no",0),
# # c("Block3","LowX","Low","no",0),
# # c("Block3","MedX","Medium","no",0),
# # c("Block3","LowY","Low","yes",1),
# # c("Block3","MedXY","Medium","yes",1),
# c("Block3","HighY","High","yes",1))
data_proba_extinction_augmented <- rbind(data_proba_extinction,
c("Block3","HighY","High","yes",1))
data_proba_extinction_augmented$y <- as.numeric(data_proba_extinction_augmented$y)
mod0 <- glm(y ~ Genetic_Diversity-1 + Block ,
data = data_proba_extinction_augmented, family = binomial)
summary(mod0)
##
## Call:
## glm(formula = y ~ Genetic_Diversity - 1 + Block, family = binomial,
## data = data_proba_extinction_augmented)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1885 -0.4686 -0.4349 -0.1538 2.9692
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Genetic_DiversityHigh -4.39594 1.28253 -3.428 0.000609 ***
## Genetic_DiversityMedium -2.31099 0.86770 -2.663 0.007737 **
## Genetic_DiversityLow -2.11810 0.79597 -2.661 0.007790 **
## BlockBlock4 -0.03575 1.05172 -0.034 0.972886
## BlockBlock5 2.14410 0.86309 2.484 0.012984 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 117.835 on 85 degrees of freedom
## Residual deviance: 58.406 on 80 degrees of freedom
## AIC: 68.406
##
## Number of Fisher Scoring iterations: 6
#Real data
(p_high <- plogis(-21.3144))
## [1] 5.536989e-10
(p_med <- plogis(-3.0142))
## [1] 0.04678847
(p_low <- plogis(-2.8082))
## [1] 0.05688267
#Augmented data
(p_high <- plogis(-3.9197))
## [1] 0.01946081
(p_med <- plogis(-1.6788))
## [1] 0.1572544
(p_low <- plogis(-1.5627))
## [1] 0.1732596
#Augmented data with only extinction
(p_high <- plogis(-4.39594))
## [1] 0.01217718
(p_med <- plogis(-2.31099))
## [1] 0.09021685
(p_low <- plogis(-2.11810))
## [1] 0.10735
# Confidence interval: high [0,1]
plogis(confint(mod0))
## 2.5 % 97.5 %
## Genetic_DiversityHigh 0.0004918074 0.09139237
## Genetic_DiversityMedium 0.0127711273 0.30480717
## Genetic_DiversityLow 0.0174349180 0.31771819
## BlockBlock4 0.0963225771 0.89727408
## BlockBlock5 0.6478312906 0.98431182
mod0 <- glm(y ~ Genetic_Diversity + Block ,
data = data_proba_extinction_augmented, family = binomial)
mod1 <- glm(y ~ Block ,
data = data_proba_extinction_augmented, family = binomial)
anova(mod1,mod0,test="Chisq")
## Analysis of Deviance Table
##
## Model 1: y ~ Block
## Model 2: y ~ Genetic_Diversity + Block
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 82 64.685
## 2 80 58.406 2 6.2787 0.04331 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##############
######## 3- PenaliZed Logistic Regression data
##############
fit<-logistf::logistf(y ~ Genetic_Diversity-1 + Block,
data=data_proba_extinction)
summary(fit)
## logistf::logistf(formula = y ~ Genetic_Diversity - 1 + Block,
## data = data_proba_extinction)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq
## Genetic_DiversityHigh -5.5403989 1.6381117 -10.6142028 -2.9271591 31.0874311
## Genetic_DiversityMedium -2.5593086 0.9220804 -4.9074011 -0.9845788 12.1477934
## Genetic_DiversityLow -2.3929983 0.8625221 -4.6407993 -0.9400804 12.5897375
## BlockBlock4 0.5511409 1.0469817 -1.5552881 3.0070956 0.2677213
## BlockBlock5 2.5551098 0.9273661 0.9131677 4.8771647 10.2850062
## p method
## Genetic_DiversityHigh 2.466634e-08 2
## Genetic_DiversityMedium 4.914598e-04 2
## Genetic_DiversityLow 3.878706e-04 2
## BlockBlock4 6.048644e-01 2
## BlockBlock5 1.341156e-03 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=29.25035 on 5 df, p=2.070724e-05, n=84
## Wald test = 23.62508 on 5 df, p = 0.0002562504
nobs(fit)
## [1] 84
# Test genetic diversity effect
mod0<-logistf::logistf(y ~ Genetic_Diversity + Block,
data=data_proba_extinction)
drop1(mod0)
## ChiSq df P-value
## Genetic_Diversity 9.394033 2 0.009122452
## Block 12.698693 2 0.001747889
extractAIC(mod0)
## null
## 4.00000 29.29027
mod0<-logistf::logistf(y ~ Genetic_Diversity + Block,
data=data_proba_extinction)
mod1<-logistf::logistf(y ~ Block,
data=data_proba_extinction)
anova(mod1,mod0)
## Comparison of logistf models:
## Formula ChiSquared
## 1 y ~ Genetic_Diversity + Block 21.29027
## 2 y ~ Block 11.89623
##
## Method: nested
## Chi-Squared: 9.394033 df= 2 P= 0.009122452
#lmtest::lrtest(mod1,mod0)
### Estimates and CI
fit<-logistf::logistf(y ~ Genetic_Diversity-1 + Block,
data=data_proba_extinction)
sum_p_extinct <- summary(fit)
## logistf::logistf(formula = y ~ Genetic_Diversity - 1 + Block,
## data = data_proba_extinction)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq
## Genetic_DiversityHigh -5.5403989 1.6381117 -10.6142028 -2.9271591 31.0874311
## Genetic_DiversityMedium -2.5593086 0.9220804 -4.9074011 -0.9845788 12.1477934
## Genetic_DiversityLow -2.3929983 0.8625221 -4.6407993 -0.9400804 12.5897375
## BlockBlock4 0.5511409 1.0469817 -1.5552881 3.0070956 0.2677213
## BlockBlock5 2.5551098 0.9273661 0.9131677 4.8771647 10.2850062
## p method
## Genetic_DiversityHigh 2.466634e-08 2
## Genetic_DiversityMedium 4.914598e-04 2
## Genetic_DiversityLow 3.878706e-04 2
## BlockBlock4 6.048644e-01 2
## BlockBlock5 1.341156e-03 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=29.25035 on 5 df, p=2.070724e-05, n=84
## Wald test = 23.62508 on 5 df, p = 0.0002562504
# Confidence interval:
plogis(sum_p_extinct$coefficients)
## Genetic_DiversityHigh Genetic_DiversityMedium Genetic_DiversityLow
## 0.003909616 0.071803609 0.083708170
## BlockBlock4 BlockBlock5
## 0.634400249 0.927916044
plogis(sum_p_extinct$ci.lower)
## Genetic_DiversityHigh Genetic_DiversityMedium Genetic_DiversityLow
## 2.456403e-05 7.337438e-03 9.557749e-03
## BlockBlock4 BlockBlock5
## 1.743238e-01 7.136479e-01
plogis(sum_p_extinct$ci.upper)
## Genetic_DiversityHigh Genetic_DiversityMedium Genetic_DiversityLow
## 0.05082721 0.27198420 0.28088410
## BlockBlock4 BlockBlock5
## 0.95289365 0.99243902
data.frame(coef = plogis(sum_p_extinct$coefficients),
ci_low = plogis(sum_p_extinct$ci.lower),
ci_upper = plogis(sum_p_extinct$ci.upper))
## coef ci_low ci_upper
## Genetic_DiversityHigh 0.003909616 2.456403e-05 0.05082721
## Genetic_DiversityMedium 0.071803609 7.337438e-03 0.27198420
## Genetic_DiversityLow 0.083708170 9.557749e-03 0.28088410
## BlockBlock4 0.634400249 1.743238e-01 0.95289365
## BlockBlock5 0.927916044 7.136479e-01 0.99243902
# Probability
(p_high <- plogis(-5.5403989))
## [1] 0.003909616
(p_med <- plogis(-2.5593086))
## [1] 0.07180361
(p_low <- plogis(-2.3929983))
## [1] 0.08370817
plogis(confint(fit))
## Lower 95% Upper 95%
## Genetic_DiversityHigh 2.456403e-05 0.05082721
## Genetic_DiversityMedium 7.337438e-03 0.27198420
## Genetic_DiversityLow 9.557749e-03 0.28088410
## BlockBlock4 1.743238e-01 0.95289365
## BlockBlock5 7.136479e-01 0.99243902
#plogis(sum_p_extinct)
Time to
extinction
data_timeextinction<-data[data$First_Throw=="extinct",]
data_timeextinction <- data_timeextinction[complete.cases(data_timeextinction$Nb_adults), ]
data_timeextinction
## Population Block Old_Label Label
## 207 Low16 Block4 B4-O1 8/18 BE B4 | G6 Low 16
## 274 Low27 Block5 B5-B1 7/21 BE B5 | G5 Low 27
## 277 Low28 Block5 B5-E1 6/16 BE B5 | G4 Low 28
## 296 Low30 Block5 B5-D1 7/21 BE B5 | G5 Low 30
## 301 Low31 Block5 B(2)5-P1 7/21 BE B5 | G5 Low 31
## 309 Low32 Block5 B(2)5-Q1 6/16 BE B5 | G4 Low 32
## 318 Low33 Block5 B(2)5-T1 8/25 BE B5 | G6 Low 33
## 335 Low36 Block5 B(2)5-X1 7/21 BE B5 | G5 Low 36
## 397 Med14 Block4 B4-Backup Fam L 7/14 BE B4 | G5 Med 14
## 410 Med17 Block5 B5 - Backup Fam F 6/16 BE B5 | G4 Med 17
## 438 Med20 Block5 B5 - Backup Fam I 7/21 BE B5 | G5 Med 20
## 448 Med22 Block5 B5-Backup Fam B 6/16 BE B5 | G4 Med 22
## 476 Med5 Block3 B3 - Backup Fam E 8/11 BE B3 | G6 Med 5
## Genetic_Diversity Generation_Parents Generation_Eggs Date_Census
## 207 Low 5 6 8/18/21
## 274 Low 4 5 7/21/21
## 277 Low 3 4 6/16/21
## 296 Low 4 5 7/21/21
## 301 Low 4 5 7/21/21
## 309 Low 3 4 6/16/21
## 318 Low 5 6 8/25/21
## 335 Low 4 5 7/21/21
## 397 Medium 4 5 7/14/21
## 410 Medium 3 4 6/16/21
## 438 Medium 4 5 7/21/21
## 448 Medium 3 4 6/16/21
## 476 Medium 5 6 8/11/21
## Date_Start_Eggs Date_End_Eggs Image_Box1 Image_Box2 MC_Box1 MC_Box2
## 207 8/18/21 8/19/21 <NA> <NA> NA NA
## 274 7/21/21 7/22/21 <NA> <NA> 0 0
## 277 6/16/21 6/17/21 <NA> <NA> 0 0
## 296 7/21/21 7/22/21 <NA> <NA> 0 0
## 301 7/21/21 7/22/21 <NA> <NA> 0 0
## 309 6/16/21 6/17/21 <NA> <NA> 0 0
## 318 8/25/21 8/26/21 <NA> <NA> NA NA
## 335 7/21/21 7/22/21 <NA> <NA> 0 0
## 397 7/14/21 7/15/21 <NA> <NA> 0 0
## 410 6/16/21 6/17/21 <NA> <NA> 0 0
## 438 7/21/21 7/22/21 <NA> <NA> 0 0
## 448 6/16/21 6/17/21 <NA> <NA> 0 0
## 476 8/11/21 8/12/21 <NA> <NA> 0 0
## MC_Total_Adults HC_Box1 HC_Box2 HC_Total_Adults Nb_parents Growth_rate
## 207 0 0 0 0 2 0
## 274 0 0 0 0 3 0
## 277 0 0 0 0 2 0
## 296 0 0 0 0 1 0
## 301 0 0 0 0 2 0
## 309 0 0 0 0 127 0
## 318 0 NA NA 0 2 0
## 335 0 0 0 0 1 0
## 397 0 NA NA 0 8 NA
## 410 0 0 0 0 5 0
## 438 0 0 0 0 1 0
## 448 0 0 0 0 16 0
## 476 0 NA NA 0 1 0
## First_Throw Extinction_finalstatus Less_than_5 Nb_adults Lambda obs
## 207 extinct yes yes 0 0 462
## 274 extinct yes yes 0 0 403
## 277 extinct yes yes 0 0 320
## 296 extinct yes yes 0 0 406
## 301 extinct yes yes 0 0 407
## 309 extinct yes yes 0 0 324
## 318 extinct yes yes 0 0 493
## 335 extinct yes yes 0 0 412
## 397 extinct yes yes 0 0 392
## 410 extinct yes yes 0 0 329
## 438 extinct yes yes 0 0 416
## 448 extinct yes yes 0 0 334
## 476 extinct yes yes 0 0 443
## gen_square He_start He_lost_gen_t He_remain He_exp He_end
## 207 36 0.5544299 NA NA 0.6449276 0.3575672
## 274 25 0.5367998 NA NA 0.8056432 0.4324691
## 277 16 0.5386585 NA NA 0.7452523 0.4014365
## 296 25 0.5540444 NA NA 0.4950540 0.2742819
## 301 25 0.5532775 NA NA 0.7440450 0.4116634
## 309 16 0.5528880 NA NA 0.9892872 0.5469651
## 318 36 0.5523333 NA NA 0.6083089 0.3359893
## 335 25 0.5427371 NA NA 0.4855518 0.2635269
## 397 25 0.6802331 NA NA 0.9285014 0.6315974
## 410 16 0.6825509 NA NA 0.8944237 0.6104897
## 438 25 0.6819650 NA NA 0.4841994 0.3302071
## 448 16 0.6809168 NA NA 0.9614965 0.6546991
## 476 36 0.6807065 NA NA 0.4714889 0.3209456
## ID_extinction
## 207 extant
## 274 extant
## 277 extant
## 296 extant
## 301 extant
## 309 extant
## 318 extant
## 335 extant
## 397 extant
## 410 extant
## 438 extant
## 448 extant
## 476 extant
tapply(data_timeextinction$Generation_Eggs,data_timeextinction$Genetic_Diversity,mean)
## High Medium Low
## NA 4.8 5.0
#Test time of extinction
mod1 <- glm(Generation_Eggs ~ Genetic_Diversity + Block,
data = data_timeextinction, family = "poisson")
mod0 <- glm(Generation_Eggs ~ Block,
data = data_timeextinction, family = "poisson")
anova(mod0, mod1, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: Generation_Eggs ~ Block
## Model 2: Generation_Eggs ~ Genetic_Diversity + Block
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 10 0.95459
## 2 9 0.74888 1 0.20571 0.6502
#We keep genetic diversity
#Check for overdispersion
sqrt(deviance(mod1)/(nobs(mod1)-length(coef(mod1))))
## [1] 0.2884598
sigma(mod1)
## [1] 0.2884598
summary(mod1)
##
## Call:
## glm(formula = Generation_Eggs ~ Genetic_Diversity + Block, family = "poisson",
## data = data_timeextinction)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.41035 -0.13936 0.05512 0.05512 0.49031
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.7918 0.4082 4.389 1.14e-05 ***
## Genetic_DiversityLow 0.1295 0.2877 0.450 0.653
## BlockBlock4 -0.1539 0.5301 -0.290 0.772
## BlockBlock5 -0.3366 0.4813 -0.699 0.484
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 1.40752 on 12 degrees of freedom
## Residual deviance: 0.74888 on 9 degrees of freedom
## AIC: 53.668
##
## Number of Fisher Scoring iterations: 4
#Residual deviance / df
summary(mod1)
##
## Call:
## glm(formula = Generation_Eggs ~ Genetic_Diversity + Block, family = "poisson",
## data = data_timeextinction)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.41035 -0.13936 0.05512 0.05512 0.49031
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.7918 0.4082 4.389 1.14e-05 ***
## Genetic_DiversityLow 0.1295 0.2877 0.450 0.653
## BlockBlock4 -0.1539 0.5301 -0.290 0.772
## BlockBlock5 -0.3366 0.4813 -0.699 0.484
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 1.40752 on 12 degrees of freedom
## Residual deviance: 0.74888 on 9 degrees of freedom
## AIC: 53.668
##
## Number of Fisher Scoring iterations: 4
0.74888/9
## [1] 0.08320889
AER::dispersiontest(mod1)
##
## Overdispersion test
##
## data: mod1
## z = -25.921, p-value = 1
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 0.05772897
# # Add random obs effect
# data_dyn$obs<-as.factor(1:nrow(data_dyn))
# Perform the lrtest
(A <- logLik(mod1))
## 'log Lik.' -22.83384 (df=4)
(B <- logLik(mod0))
## 'log Lik.' -22.9367 (df=3)
#Since the logLik() function gives more information than the numeric value, use the as.numeric() function to isolate the numeric value
(teststat <- -2 * (as.numeric(A)-as.numeric(B)))
## [1] -0.2057062
#df = 5 - 3 = 2
(p.val <- pchisq(teststat, df = 2, lower.tail = FALSE))
## [1] 1
lmtest::lrtest(mod0,mod1)
## Likelihood ratio test
##
## Model 1: Generation_Eggs ~ Block
## Model 2: Generation_Eggs ~ Genetic_Diversity + Block
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 3 -22.937
## 2 4 -22.834 1 0.2057 0.6502
mod1 <- glm(Generation_Eggs ~ Genetic_Diversity-1 + Block,
data = data_timeextinction, family = "poisson")
sum_mod1 <- summary(mod1)
datatransformed <- confint(mod1)
exp(datatransformed)
## 2.5 % 97.5 %
## Genetic_DiversityMedium 2.3845707 12.157137
## Genetic_DiversityLow 2.3627659 17.279049
## BlockBlock4 0.3099881 2.578667
## BlockBlock5 0.2925632 1.999911
exp(sum_mod1$coefficients)
## Estimate Std. Error z value Pr(>|z|)
## Genetic_DiversityMedium 6.0000000 1.504181 80.5514766 1.000011
## Genetic_DiversityLow 6.8296508 1.647836 46.8373186 1.000120
## BlockBlock4 0.8573889 1.699154 0.7480860 2.163300
## BlockBlock5 0.7142037 1.618161 0.4969116 1.623100
exp(1.568)
## [1] 4.797045
exp(1.6094)
## [1] 4.99981
Population size over
time
GAMMM Model
Basic model
# https://jacolienvanrij.com/Tutorials/GAMM.html
# install.packages("itsadug")
# packageVersion('mgcv')
# packageVersion('itsadug')
data_dyn <- data[data$Generation_Eggs>=2&data$Extinction_finalstatus=="no",]
str(data_dyn)
## 'data.frame': 355 obs. of 33 variables:
## $ Population : Factor w/ 84 levels "High1","High13",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ Block : Factor w/ 3 levels "Block3","Block4",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ Old_Label : chr "B3 All red mix " "B3 All red mix " "B3_All_Red_Mix " "B3_All_Red_Mix " ...
## $ Label : chr "7/7 BE B3 | G5 High 1" "6/2 BE B3 | G4 High 1" "4/28 BE B3 | G3 High 1" "BE B3 3/24 | G2 High1" ...
## $ Genetic_Diversity : Factor w/ 3 levels "High","Medium",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Generation_Parents : int 4 3 2 1 5 5 3 2 1 4 ...
## $ Generation_Eggs : int 5 4 3 2 6 6 4 3 2 5 ...
## $ Date_Census : chr "7/7/21" "6/2/21" "4/28/21" "3/24/21" ...
## $ Date_Start_Eggs : chr "7/7/21" "6/2/21" "4/28/21" "3/24/21" ...
## $ Date_End_Eggs : chr "7/8/21" "6/3/21" "4/29/21" "3/25/21" ...
## $ Image_Box1 : chr "DSC_0779" "DSC_0206" "DSC_0585" "DSC_0920" ...
## $ Image_Box2 : chr "DSC_0780" "DSC_0207" "DSC_0586" "DSC_0921" ...
## $ MC_Box1 : num 26.6 85.4 82.3 266.9 100.1 ...
## $ MC_Box2 : num 5.15 90.72 78.01 149.8 74.38 ...
## $ MC_Total_Adults : num 31.8 176.1 160.3 416.7 174.5 ...
## $ HC_Box1 : int 27 79 83 253 64 47 71 60 101 47 ...
## $ HC_Box2 : int 4 96 82 135 58 80 118 113 214 44 ...
## $ HC_Total_Adults : int 31 175 165 388 122 127 189 173 315 91 ...
## $ Nb_parents : num 175 165 388 100 31 91 173 315 100 189 ...
## $ Growth_rate : num 0.177 1.061 0.425 3.88 3.935 ...
## $ First_Throw : chr "no" "no" "no" "no" ...
## $ Extinction_finalstatus: Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Less_than_5 : chr "no" "no" "no" "no" ...
## $ Nb_adults : int 31 175 165 388 122 127 189 173 315 91 ...
## $ Lambda : num 0.177 1.061 0.425 3.88 3.935 ...
## $ obs : Factor w/ 504 levels "1","2","3","4",..: 337 253 169 85 421 447 279 195 111 363 ...
## $ gen_square : num 25 16 9 4 36 36 16 9 4 25 ...
## $ He_start : num 0.999 0.999 0.999 0.999 0.999 ...
## $ He_lost_gen_t : num 0.984 0.997 0.997 0.999 0.996 ...
## $ He_remain : num 0.971 0.986 0.989 0.992 0.967 ...
## $ He_exp : num 0.968 0.968 0.968 0.968 0.968 ...
## $ He_end : num 0.967 0.967 0.967 0.967 0.967 ...
## $ ID_extinction : chr "extant" "extant" "extant" "extant" ...
# 1-dimensional smooth of Time:
#m1 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs), data=data_dyn)
#Meaning the GAM has failed to construct a sensible smooth term for this data. You can limit the maximum df of the smooth using the k parameter:
m1 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, k=3), family = "poisson", data=data_dyn)
#number of ‘knots’. This parameter determines the upper bound of the number of underlying base functions being used to build up the curve. Thus, this parameter constraints the wigglyness of a smooth, or - as a metaphor - the number of bowpoints of a curve.
#Note that the model will base the number of base functions (reflected in the edf of the summary) on the data with the setting for k as upper bound. By default, the value of k for s() is around 9, and for te() and ti() 5 per dimension. Importantly, the value of k should be at most one less than the number of unique data points, otherwise it will fit the density of that predictor.
# interaction with non-isotropicterms:
#m2 <- gam(Nb_adults ~ te(Generation_Eggs, Genetic_Diversity), data=data_dyn)
# decompose the same interaction in main effects + interaction:
# (note: include main effects)
# m3 <- gam(Nb_adults ~ s(Generation_Eggs) + s(Genetic_Diversity) +
# + ti(Generation_Eggs, Genetic_Diversity),
# data=data_dyn)
# s() : for modeling a 1-dimensional smooth, or for modeling isotropic interactions (variables are measured in same units and on same scale)
# te(): for modeling 2- or n-dimensional interaction surfaces of variables that are not isotropic (but see info about d parameter below). Includes ‘main’ effects.
# ti(): for modeling 2- or n-dimensional interaction surfaces that do not include the ‘main effects
###########
########### ADD FACTOR EFFECT:
###########
# How to set up the interaction depends on the type of grouping predictor:
# with factor include intercept difference: Group + s(Time, by=Group).
# with ordered factor include intercept difference and reference smooth: Group + s(Time) + s(Time, by=Group).
# with binary predictor include reference smooth: s(Time) + s(Time, by=IsGroupChildren).
m.factor <- mgcv::gam(Nb_adults ~ Genetic_Diversity + s(Generation_Eggs, k=3) +
s(Generation_Eggs, by= Genetic_Diversity, k=3), family = "poisson", data=data_dyn)
summary(m.factor)
##
## Family: poisson
## Link function: log
##
## Formula:
## Nb_adults ~ Genetic_Diversity + s(Generation_Eggs, k = 3) + s(Generation_Eggs,
## by = Genetic_Diversity, k = 3)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.048711 0.007184 702.75 <2e-16 ***
## Genetic_DiversityMedium -0.364241 0.012871 -28.30 <2e-16 ***
## Genetic_DiversityLow -0.448867 0.011778 -38.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Generation_Eggs) 1.96929 1.99358 3669.85 <2e-16 ***
## s(Generation_Eggs):Genetic_DiversityHigh 1.92775 1.98689 200.26 <2e-16 ***
## s(Generation_Eggs):Genetic_DiversityMedium 0.01232 0.01357 0.00 0.991
## s(Generation_Eggs):Genetic_DiversityLow 1.96596 1.99514 83.68 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 10/11
## R-sq.(adj) = 0.631 Deviance explained = 57.3%
## UBRE = 32.587 Scale est. = 1 n = 353
#The smooth terms summary shows three smooths that are significantly different from 0.
#We cannot conclude from the summary that the two curves are different from each other.
m.factor <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=3) +
s(Generation_Eggs, by= Genetic_Diversity, k=3),family = "poisson", data=data_dyn)
summary(m.factor)
##
## Family: poisson
## Link function: log
##
## Formula:
## Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 3) +
## s(Generation_Eggs, by = Genetic_Diversity, k = 3)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.112244 0.009925 515.080 < 2e-16 ***
## Genetic_DiversityMedium -0.371509 0.012939 -28.713 < 2e-16 ***
## Genetic_DiversityLow -0.471075 0.011911 -39.549 < 2e-16 ***
## BlockBlock4 -0.043703 0.010311 -4.239 2.25e-05 ***
## BlockBlock5 -0.157407 0.012266 -12.833 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Generation_Eggs) 1.9693 1.9935 2912.46 < 2e-16 ***
## s(Generation_Eggs):Genetic_DiversityHigh 1.9286 1.9870 159.13 < 2e-16 ***
## s(Generation_Eggs):Genetic_DiversityMedium 1.0124 1.0137 17.21 3.07e-05 ***
## s(Generation_Eggs):Genetic_DiversityLow 0.9659 0.9951 79.53 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 12/13
## R-sq.(adj) = 0.623 Deviance explained = 58%
## UBRE = 32.112 Scale est. = 1 n = 353
###########
########### ADD RANDOM EFFECT:
###########
# random intercepts adjust the height of other modelterms with a constant value: s(Subject, bs="re").
# random slopes adjust the slope ofthe trend of a numeric predictor: s(Subject, Time, bs="re").
# random smooths adjust the trend of a numeric predictor in a nonlinear way: s(Time, Subject, bs="fs", m=1).
m.mixed <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=3) +
s(Generation_Eggs, by= Genetic_Diversity, k=3) +
s(Population, bs="re") , family = "poisson", data=data_dyn)
summary(m.mixed)
##
## Family: poisson
## Link function: log
##
## Formula:
## Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 3) +
## s(Generation_Eggs, by = Genetic_Diversity, k = 3) + s(Population,
## bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.09773 0.07801 65.347 < 2e-16 ***
## Genetic_DiversityMedium -0.39811 0.08934 -4.456 8.34e-06 ***
## Genetic_DiversityLow -0.48810 0.08143 -5.994 2.05e-09 ***
## BlockBlock4 -0.03068 0.08006 -0.383 0.7016
## BlockBlock5 -0.18745 0.09303 -2.015 0.0439 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Generation_Eggs) 1.96948 1.99405 3715.94 <2e-16
## s(Generation_Eggs):Genetic_DiversityHigh 1.92487 1.98702 200.33 <2e-16
## s(Generation_Eggs):Genetic_DiversityMedium 0.01073 0.01185 0.00 0.992
## s(Generation_Eggs):Genetic_DiversityLow 1.96684 1.99558 84.64 <2e-16
## s(Population) 64.77416 66.00000 2603.25 <2e-16
##
## s(Generation_Eggs) ***
## s(Generation_Eggs):Genetic_DiversityHigh ***
## s(Generation_Eggs):Genetic_DiversityMedium
## s(Generation_Eggs):Genetic_DiversityLow ***
## s(Population) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 83/84
## R-sq.(adj) = 0.642 Deviance explained = 68%
## UBRE = 24.585 Scale est. = 1 n = 353
Model
selection
###########
########### AIC
###########
m1 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, by= Genetic_Diversity, k=3), family = "poisson", data=data_dyn)
m2 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, by= Genetic_Diversity, k=4), family = "poisson", data=data_dyn)
m3 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, by= Genetic_Diversity, k=5), family = "poisson", data=data_dyn)
m4 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +
s(Generation_Eggs, by= Genetic_Diversity, k=3), family = "poisson", data=data_dyn)
m5 <- mgcv::gam(Nb_adults ~ Block+
s(Generation_Eggs, by= Genetic_Diversity, k=4), family = "poisson", data=data_dyn)
m6 <- mgcv::gam(Nb_adults ~ Block + s(Generation_Eggs, k=3) +
s(Generation_Eggs, by= Genetic_Diversity, k=5), family = "poisson", data=data_dyn)
m7 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=3) +
s(Generation_Eggs, by= Genetic_Diversity, k=5), family = "poisson", data=data_dyn)
m8 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, by= Genetic_Diversity, k=3) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
m9 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, by= Genetic_Diversity, k=4) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
m10 <- mgcv::gam(Nb_adults ~ s(Generation_Eggs, by= Genetic_Diversity, k=5)+
s(Population, bs="re"), family = "poisson", data=data_dyn)
m11 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +
s(Generation_Eggs, by= Genetic_Diversity, k=3)+
s(Population, bs="re"), family = "poisson", data=data_dyn)
m12 <- mgcv::gam(Nb_adults ~ Block +
s(Generation_Eggs, by= Genetic_Diversity, k=4) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
m13 <- mgcv::gam(Nb_adults ~ Block + s(Generation_Eggs, k=3) +
s(Generation_Eggs, by= Genetic_Diversity, k=5) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
m14 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=3) +
s(Generation_Eggs, by= Genetic_Diversity, k=5) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
m15 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=5) +
s(Generation_Eggs, by= Genetic_Diversity, k=5) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
m16 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=4) +
s(Generation_Eggs, by= Genetic_Diversity, k=4) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
m17 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=3) +
s(Generation_Eggs, by= Genetic_Diversity, k=3) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
AIC(m1,m2,m3,m4,m5,m6,m7,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17)
## df AIC
## m1 6.994710 15820.24
## m2 9.763989 15822.89
## m3 12.061325 15790.92
## m4 10.994807 13955.58
## m5 11.764852 15759.43
## m6 12.729808 15726.51
## m7 16.175767 13915.73
## m8 76.248665 11298.95
## m9 79.061452 11296.89
## m10 81.484894 11253.32
## m11 75.770122 11298.48
## m12 79.073993 11296.91
## m13 80.625022 11252.52
## m14 80.103661 11252.00
## m15 80.271775 11251.94
## m16 77.938918 11295.61
## m17 75.646077 11298.34
#https://www.seascapemodels.org/rstats/2021/03/27/common-GAM-problems.html
# Real comparison
mod0 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=5) +
s(Generation_Eggs, by= Genetic_Diversity, k=5) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
mod1 <- mgcv::gam(Nb_adults ~ Block + s(Generation_Eggs, k=5) +
s(Generation_Eggs, by= Genetic_Diversity, k=5) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
mod2 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=5) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
mod3 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +
s(Generation_Eggs, by= Genetic_Diversity, k=3) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
mod4 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +
s(Generation_Eggs, by= Genetic_Diversity, k=4) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
mod5 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +
s(Generation_Eggs, by= Genetic_Diversity, k=5) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
mod7 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=3) +
s(Generation_Eggs, by= Genetic_Diversity, k=3) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
mod8 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=4) +
s(Generation_Eggs, by= Genetic_Diversity, k=4) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
mod9 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=5) +
s(Generation_Eggs, by= Genetic_Diversity, k=5) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
AIC(mod1, mod2, mod3, mod4, mod5, mod7, mod8, mod9)
## df AIC
## mod1 80.86938 11252.52
## mod2 73.55589 11621.29
## mod3 75.77012 11298.48
## mod4 78.56136 11296.40
## mod5 80.96276 11252.79
## mod7 75.64608 11298.34
## mod8 77.93892 11295.61
## mod9 80.27178 11251.94
modA <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=5) +
s(Generation_Eggs, by= Genetic_Diversity, k=5) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
modB <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=5) +
s(Population, bs="re"),family = "poisson", data=data_dyn)
modC <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +
s(Generation_Eggs, by= Genetic_Diversity, k=5) +
s(Population, bs="re"), family = "poisson", data=data_dyn)
AIC(modA, modB, modC)
## df AIC
## modA 80.27178 11251.94
## modB 73.55589 11621.29
## modC 80.96276 11252.79
visreg::visreg(modA, xvar = "Generation_Eggs",
by = "Genetic_Diversity", data = data_dyn,
method = "REML")

visreg::visreg(modC, xvar = "Generation_Eggs",
by = "Genetic_Diversity", data = data_dyn,
method = "REML")

Checking
overdisperson
mod0 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=5) +
s(Generation_Eggs, by = Genetic_Diversity, k=5) +
s(Population, bs="re"), method="ML", family = "poisson", link = log, data=data_dyn)
#Check overdispersion
sqrt(deviance(mod0)/(nobs(mod0)-length(coef(mod0))))
## [1] 5.814783
sigma(mod0)
## [1] 5.814783
summary(mod0)
##
## Family: poisson
## Link function: log
##
## Formula:
## Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) +
## s(Generation_Eggs, by = Genetic_Diversity, k = 5) + s(Population,
## bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.09849 0.06780 75.195 < 2e-16 ***
## Genetic_DiversityMedium -0.39933 0.07771 -5.138 2.77e-07 ***
## Genetic_DiversityLow -0.48975 0.07084 -6.913 4.74e-12 ***
## BlockBlock4 -0.03083 0.06959 -0.443 0.6578
## BlockBlock5 -0.18652 0.08089 -2.306 0.0211 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Generation_Eggs) 3.833e+00 3.954e+00 3742.51 <2e-16
## s(Generation_Eggs):Genetic_DiversityHigh 3.859e+00 3.970e+00 236.75 <2e-16
## s(Generation_Eggs):Genetic_DiversityMedium 1.414e-04 1.671e-04 0.00 0.998
## s(Generation_Eggs):Genetic_DiversityLow 3.391e+00 3.756e+00 86.79 <2e-16
## s(Population) 6.438e+01 6.600e+01 2593.63 <2e-16
##
## s(Generation_Eggs) ***
## s(Generation_Eggs):Genetic_DiversityHigh ***
## s(Generation_Eggs):Genetic_DiversityMedium
## s(Generation_Eggs):Genetic_DiversityLow ***
## s(Population) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 91/92
## R-sq.(adj) = 0.638 Deviance explained = 68.2%
## -ML = 5739.3 Scale est. = 1 n = 353
# Add random obs effect
data_dyn$obs<-as.factor(1:nrow(data_dyn))
mod0 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=5) +
s(Generation_Eggs, by = Genetic_Diversity, k=5) +
s(Population, bs="re"),
method="ML", family = mgcv::nb(), link = log, data=data_dyn)
#Check overdispersion
sqrt(deviance(mod0)/(nobs(mod0)-length(coef(mod0))))
## [1] 1.16428
sigma(mod0)
## [1] 1.16428
summary(mod0)
##
## Family: Negative Binomial(2.957)
## Link function: log
##
## Formula:
## Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) +
## s(Generation_Eggs, by = Genetic_Diversity, k = 5) + s(Population,
## bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.16902 0.08458 61.114 < 2e-16 ***
## Genetic_DiversityMedium -0.40435 0.09679 -4.178 2.94e-05 ***
## Genetic_DiversityLow -0.51630 0.08833 -5.845 5.06e-09 ***
## BlockBlock4 -0.08511 0.08693 -0.979 0.327538
## BlockBlock5 -0.33631 0.10114 -3.325 0.000884 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Generation_Eggs) 3.371e+00 3.781e+00 164.635 <2e-16
## s(Generation_Eggs):Genetic_DiversityHigh 1.000e+00 1.000e+00 5.508 0.0189
## s(Generation_Eggs):Genetic_DiversityMedium 3.097e-05 6.149e-05 0.000 0.9996
## s(Generation_Eggs):Genetic_DiversityLow 1.000e+00 1.001e+00 0.494 0.4823
## s(Population) 1.988e+01 6.600e+01 30.347 0.0037
##
## s(Generation_Eggs) ***
## s(Generation_Eggs):Genetic_DiversityHigh *
## s(Generation_Eggs):Genetic_DiversityMedium
## s(Generation_Eggs):Genetic_DiversityLow
## s(Population) **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 91/92
## R-sq.(adj) = 0.612 Deviance explained = 51.5%
## -ML = 1974.9 Scale est. = 1 n = 353
Testing for
significance
# Three important methods to determine the significance of predictors:
## -1 Model-comparison procedure (using function compareML).
## -2 Test statistics of the smooth terms as presented in the summary.
## -3 Visualization of the smooth terms (e.g., difference plots and difference surfaces) but useful only with significative interaction https://jacolienvanrij.com/Tutorials/GAMM.html
###### 1- Model comparison
### Test interaction Generation*Genetic Diversity
mod_1 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=5) +
s(Generation_Eggs, by = Genetic_Diversity, k=5) +
s(Population, bs="re"), method="ML", family = mgcv::nb(), data=data_dyn)
mod_2 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=5) +
s(Population, bs="re"), data=data_dyn, family = mgcv::nb(), method='ML')
itsadug::compareML( mod_2,mod_1,suggest.report = TRUE)
## mod_2: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) +
## s(Population, bs = "re")
##
## mod_1: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) +
## s(Generation_Eggs, by = Genetic_Diversity, k = 5) + s(Population,
## bs = "re")
##
## Report suggestion: The Chi-Square test on the ML scores indicates that model mod_1 is [not significantly] better than model mod_2 (X2(6.00)=3.003, p>.1).
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 mod_2 1977.920 8
## 2 mod_1 1974.918 14 3.003 6.000 0.423
##
## AIC difference: 4.21, model mod_1 has lower AIC.
anova(mod_1, mod_2, test = "LRT")
## Analysis of Deviance Table
##
## Model 1: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) +
## s(Generation_Eggs, by = Genetic_Diversity, k = 5) + s(Population,
## bs = "re")
## Model 2: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) +
## s(Population, bs = "re")
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 304.92 3891.7
## 2 310.72 3905.0 -5.8082 -13.335 0.03402 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#CompareML is preferred over other functions such as AIC for models that include an AR1 model or random effects (especially nonlinear random smooths using bs='fs'). CompareML also reports the AIC difference, but that value should be treated with care.
#AIC(mod_1, mod_2)
# itsadug::gamtabs(mod_1)
# itsadug::gamtabs(mod_2, type="latex")
# summary(mod_1)
### Test effect Generation
mod_2 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +
s(Generation_Eggs, k=5) +
s(Population, bs="re"), data=data_dyn, family = mgcv::nb(), method='ML')
mod_3 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +
s(Population, bs="re"), data=data_dyn, family = mgcv::nb(), method='ML')
itsadug::compareML(mod_3, mod_2)
## mod_3: Nb_adults ~ Genetic_Diversity + Block + s(Population, bs = "re")
##
## mod_2: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) +
## s(Population, bs = "re")
##
## Chi-square test of ML scores
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 mod_3 2072.836 6
## 2 mod_2 1977.920 8 94.916 2.000 < 2e-16 ***
##
## AIC difference: 192.23, model mod_2 has lower AIC.
### Test intercept Genetic Diversity
mod_2 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +
s(Generation_Eggs, k=5) +
s(Population, bs="re"), data=data_dyn, family = mgcv::nb(), method='ML')
mod_3 <- mgcv::gam(Nb_adults ~ Block +
s(Generation_Eggs, k=5) +
s(Population, bs="re"), data=data_dyn, family = mgcv::nb(), method='ML')
itsadug::compareML(mod_3, mod_2)
## mod_3: Nb_adults ~ Block + s(Generation_Eggs, k = 5) + s(Population,
## bs = "re")
##
## mod_2: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) +
## s(Population, bs = "re")
##
## Chi-square test of ML scores
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 mod_3 1993.353 6
## 2 mod_2 1977.920 8 15.432 2.000 1.986e-07 ***
##
## AIC difference: 9.55, model mod_2 has lower AIC.
logLik(mod_3)
## 'log Lik.' -1942.118 (df=45.38753)
logLik(mod_2)#
## 'log Lik.' -1952.505 (df=30.22338)
mod_final <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +
s(Generation_Eggs, k=5) +
s(Population, bs="re"), data=data_dyn, family = mgcv::nb(), method='ML')
###### 2- Summary of the model
mod_1 <- mgcv::gam(Nb_adults ~ Genetic_Diversity-1 + Block + s(Generation_Eggs, k=5) +
s(Generation_Eggs, by = Genetic_Diversity, k=5) +
s(Population, bs="re"), method="ML", family = mgcv::nb(), data=data_dyn)
summary(mod_1)
##
## Family: Negative Binomial(2.957)
## Link function: log
##
## Formula:
## Nb_adults ~ Genetic_Diversity - 1 + Block + s(Generation_Eggs,
## k = 5) + s(Generation_Eggs, by = Genetic_Diversity, k = 5) +
## s(Population, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Genetic_DiversityHigh 5.16902 0.08458 61.114 < 2e-16 ***
## Genetic_DiversityMedium 4.76467 0.08912 53.466 < 2e-16 ***
## Genetic_DiversityLow 4.65271 0.07734 60.161 < 2e-16 ***
## BlockBlock4 -0.08511 0.08693 -0.979 0.327538
## BlockBlock5 -0.33631 0.10114 -3.325 0.000884 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Generation_Eggs) 3.371e+00 3.781e+00 151.191 < 2e-16
## s(Generation_Eggs):Genetic_DiversityHigh 1.000e+00 1.000e+00 2.677 0.10182
## s(Generation_Eggs):Genetic_DiversityMedium 3.097e-05 6.149e-05 9672.819 < 2e-16
## s(Generation_Eggs):Genetic_DiversityLow 1.000e+00 1.001e+00 0.000 0.99943
## s(Population) 1.988e+01 6.600e+01 30.349 0.00372
##
## s(Generation_Eggs) ***
## s(Generation_Eggs):Genetic_DiversityHigh
## s(Generation_Eggs):Genetic_DiversityMedium ***
## s(Generation_Eggs):Genetic_DiversityLow
## s(Population) **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 91/92
## R-sq.(adj) = 0.612 Deviance explained = 51.5%
## -ML = 1974.9 Scale est. = 1 n = 353
mgcv::summary.gam(mod_1, )
##
## Family: Negative Binomial(2.957)
## Link function: log
##
## Formula:
## Nb_adults ~ Genetic_Diversity - 1 + Block + s(Generation_Eggs,
## k = 5) + s(Generation_Eggs, by = Genetic_Diversity, k = 5) +
## s(Population, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Genetic_DiversityHigh 5.16902 0.08458 61.114 < 2e-16 ***
## Genetic_DiversityMedium 4.76467 0.08912 53.466 < 2e-16 ***
## Genetic_DiversityLow 4.65271 0.07734 60.161 < 2e-16 ***
## BlockBlock4 -0.08511 0.08693 -0.979 0.327538
## BlockBlock5 -0.33631 0.10114 -3.325 0.000884 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Generation_Eggs) 3.371e+00 3.781e+00 151.191 < 2e-16
## s(Generation_Eggs):Genetic_DiversityHigh 1.000e+00 1.000e+00 2.677 0.10182
## s(Generation_Eggs):Genetic_DiversityMedium 3.097e-05 6.149e-05 9672.819 < 2e-16
## s(Generation_Eggs):Genetic_DiversityLow 1.000e+00 1.001e+00 0.000 0.99943
## s(Population) 1.988e+01 6.600e+01 30.349 0.00372
##
## s(Generation_Eggs) ***
## s(Generation_Eggs):Genetic_DiversityHigh
## s(Generation_Eggs):Genetic_DiversityMedium ***
## s(Generation_Eggs):Genetic_DiversityLow
## s(Population) **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 91/92
## R-sq.(adj) = 0.612 Deviance explained = 51.5%
## -ML = 1974.9 Scale est. = 1 n = 353
data_predict_gam <- data.frame(Generation_Eggs = rep(seq(2,6, 0.01),3),
Genetic_Diversity = rep(levels(data_dyn$Genetic_Diversity),
each=length(seq(2,6,0.01))),
Block = "Block4",
Population = rep(c("High1","Med1","Low1"),
each=length(seq(2,6,0.01))))
data_predict_gam$fit<- mgcv::predict.gam(mod_final, newdata = data_predict_gam,
exclude_terms = list("s(Population)", "Block"),
values = list(Population = NULL, Block = NULL),
type = "response")
###### Additional tests
# With factors such as Group we cannot use the summary to test differences between groups. However, to investigate differences between groups, we could use difference smooths with ordered factors or binary predictors.
data_dyn$OFGroup_diversity <- as.factor(data_dyn$Genetic_Diversity)
# change factor to ordered factor:
data_dyn$OFGroup_diversity <- as.ordered(data_dyn$OFGroup)
# change contrast to treatment coding (difference curves)
contrasts(data_dyn$OFGroup_diversity) <- 'contr.treatment'
# Inspect contrasts:
contrasts(data_dyn$OFGroup_diversity)
## Medium Low
## High 0 0
## Medium 1 0
## Low 0 1
mod_1 <- mgcv::gam(Nb_adults ~ OFGroup_diversity + Block +
s(Generation_Eggs, k=5) +
s(Generation_Eggs, by = OFGroup_diversity, k=5) +
s(Population, bs="re"), method="ML", family = mgcv::nb(), data=data_dyn)
summary(mod_1)
##
## Family: Negative Binomial(2.957)
## Link function: log
##
## Formula:
## Nb_adults ~ OFGroup_diversity + Block + s(Generation_Eggs, k = 5) +
## s(Generation_Eggs, by = OFGroup_diversity, k = 5) + s(Population,
## bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.16902 0.08458 61.114 < 2e-16 ***
## OFGroup_diversityMedium -0.40435 0.09679 -4.178 2.94e-05 ***
## OFGroup_diversityLow -0.51630 0.08833 -5.845 5.06e-09 ***
## BlockBlock4 -0.08511 0.08693 -0.979 0.327535
## BlockBlock5 -0.33631 0.10114 -3.325 0.000883 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(Generation_Eggs) 3.371 3.781 138.039 < 2e-16 ***
## s(Generation_Eggs):OFGroup_diversityMedium 1.001 1.002 5.503 0.01900 *
## s(Generation_Eggs):OFGroup_diversityLow 1.000 1.000 3.278 0.07023 .
## s(Population) 19.879 66.000 30.350 0.00379 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.612 Deviance explained = 51.5%
## -ML = 1974.9 Scale est. = 1 n = 353
#We confirm: Interaction significative
# The summary now provides the significance of the difference terms: Medium and High does show a slightly significantly different trend over Time (F=5.50, edf=1, p=0.02), Low and High does not show significantly different trend over Time (F=19.8, edf=1, p=0.07).
###### 2- Visualization of the smooth terms
# seful only with significative interaction
# Which is not the case here
# But see: https://jacolienvanrij.com/Tutorials/GAMM.html
Plot
data_dyn <- data[data$Generation_Eggs>=2&data$Extinction_finalstatus=="no",]
mod_final <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block +
s(Generation_Eggs, k=5) +
s(Population, bs="re"), family = mgcv::nb(), data=data_dyn)
#Check transformation: OK
# mod_final <- mgcv::gam(Nb_adults ~ Genetic_Diversity +
# s(Generation_Eggs, k=5), family = mgcv::nb(), data=data_dyn)
#
# mod_A_p <- tidymv::get_gam_predictions(model = mod_final,
# series = Generation_Eggs,
# transform = exp)
# tapply(mod_A_p$Nb_adults,mod_A_p$Genetic_Diversity,min)
#
#
# data_predict_gam <- data.frame(Generation_Eggs = rep(seq(2,6, 0.01),3),
# Genetic_Diversity = rep(levels(data_dyn$Genetic_Diversity),
# each=length(seq(2,6,0.01))))
# data_predict_gam$fit<- mgcv::predict.gam(mod_final, newdata = data_predict_gam,
# type = "response")
# tapply(data_predict_gam$fit,data_predict_gam$Genetic_Diversity,min)
#
#
# data_predict_gam
# Plot block effect
mod_block_P <- tidymv::predict_gam(mod_final,
exclude_terms = list("s(Population)"),
values = list(Population = NULL))
ggplot(data = mod_block_P, aes(Generation_Eggs, fit)) +
facet_wrap(~ Block) +
tidymv::geom_smooth_ci(Genetic_Diversity)

########## EXCLUDING BLOCK EFFECT
mod_final <- mgcv::gam(Nb_adults ~ Genetic_Diversity +
s(Generation_Eggs, k=5) +
s(Population, bs="re"), family = mgcv::nb(), data=data_dyn)
mod_A_p <- tidymv::get_gam_predictions(model = mod_final,
series = Generation_Eggs,
exclude_random = TRUE,
transform = exp)
tidymv::plot_smooths(mod_final, Generation_Eggs, Genetic_Diversity,
exclude_random = TRUE,
transform = exp)

# ## Other functions
# data_predict_gam <- data.frame(Generation_Eggs = rep(seq(2,6, 0.01),3),
# Genetic_Diversity = rep(levels(data_dyn$Genetic_Diversity),
# each=length(seq(2,6,0.01))),
# Block = "Block4",
# Population = rep(c("High1","Med1","Low1"),
# each=length(seq(2,6,0.01))))
# library(mgcv)
# data_predict_gam$fit<- mgcv::predict.gam(mod_final, newdata = data_predict_gam,
# exclude_terms = list("s(Population)", "Block"),
# values = list(Population = NULL, Block = NULL),
# type = "response")
#ggplot(data = data_predict_gam, aes(Generation_Eggs, fit))
# ## PLOT
# plot_GAM_Ucurve <- ggplot(data = mod_A_p, aes(Generation_Eggs, fit)) +
# geom_point(data = data[data$Extinction_finalstatus=="no",],
# aes(x = Generation_Eggs,
# y = Nb_adults,
# group = Genetic_Diversity,
# colour = Genetic_Diversity),
# position = position_dodge2(0.3),
# size = 0.6, alpha = 0.9) +
# tidymv::plot_smooths(mod_final, Generation_Eggs, Genetic_Diversity,
# exclude_random = TRUE,
# transform = exp) +
# tidymv::geom_smooth_ci(Genetic_Diversity,
# size = 1.5, linetype = 1) +
# ylab("Population size") +
# xlab("Generation") +
# scale_color_manual(name="Demographic history",
# breaks = c("High", "Medium", "Low"),
# labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
# values = c("#00AFBB","#FC4E07","#E7B800")) +
# xlim(1.8,6.2) +
# theme_LO +
# theme(panel.grid.major.y = element_blank(),
# panel.grid.minor.y = element_blank())
# plot_GAM_Ucurve
## PLOT
plot_GAM_Ucurve <- ggplot(data = data, aes(Generation_Eggs, Nb_adults)) +
geom_point(data = data[data$Extinction_finalstatus=="no",],
aes(x = Generation_Eggs,
y = Nb_adults,
group = Genetic_Diversity,
colour = Genetic_Diversity),
position = position_dodge2(0.3),
size = 0.6, alpha = 0.9) +
ggplot2::geom_ribbon(data = mod_A_p,
aes(ymin = CI_lower,
ymax = CI_upper,
x = Generation_Eggs,
group = Genetic_Diversity,
fill = Genetic_Diversity,
color =Genetic_Diversity),
alpha=0.2, size=0.05) +
geom_line(data = mod_A_p,
aes(x = Generation_Eggs, y = Nb_adults, color = Genetic_Diversity),
size = 0.8, linetype = 1) +
ylab("Population size") +
xlab("Generation") +
scale_fill_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FD7C49","#F5C400")) +
scale_color_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FD7C49","#F5C400")) +
xlim(1.8,6.2) +
theme_LO +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
plot_GAM_Ucurve

#
# cowplot::save_plot(file = here::here("figures","PopulationSize_GAM.pdf"),
# plot_GAM_Ucurve, base_height = 10/cm(1),
# base_width = 13/cm(1), dpi = 610)
#
#To estimate confidence interval at the end of the experiment
#Default on tidymv::geom_smooth_ci is ci_z = 1.96
#population size at the end
data_gam_end <- as.data.frame(mod_A_p[mod_A_p$Generation_Eggs==6,])
#minimum population size
tapply(mod_A_p$Nb_adults,mod_A_p$Genetic_Diversity,min)
## High Medium Low
## 96.00447 65.15853 60.34392
mod_A_p[mod_A_p$Genetic_Diversity=="High"&mod_A_p$Nb_adults<=96.1,]
## .idx Genetic_Diversity Generation_Eggs Population Nb_adults SE
## 49 1 High 4.666667 High1 96.00447 0.08171657
## CI_upper CI_lower
## 49 112.6808 81.79616
mod_A_p[mod_A_p$Genetic_Diversity=="Medium"&mod_A_p$Nb_adults<=65.2,]
## .idx Genetic_Diversity Generation_Eggs Population Nb_adults SE
## 50 3 Medium 4.666667 High1 65.15853 0.09348447
## CI_upper CI_lower
## 50 78.26126 54.24949
mod_A_p[mod_A_p$Genetic_Diversity=="Low"&mod_A_p$Nb_adults<=60.4,]
## .idx Genetic_Diversity Generation_Eggs Population Nb_adults SE
## 51 2 Low 4.666667 High1 60.34392 0.08309636
## CI_upper CI_lower
## 51 71.01769 51.27438
#### Plot with the interaction effect
mod_final_interaction <- mgcv::gam(Nb_adults ~ Genetic_Diversity +
s(Generation_Eggs, k=5) + s(Generation_Eggs, by = Genetic_Diversity, k=5) +
s(Population, bs="re"), family = mgcv::nb(), data=data_dyn)
# Plot interaction
mod_A_interaction <- tidymv::get_gam_predictions(model = mod_final_interaction,
series = Generation_Eggs,
exclude_random = TRUE,
transform = exp)
plot_GAM_Ucurve_interaction <- ggplot(data = data, aes(Generation_Eggs, Nb_adults)) +
geom_point(data = data[data$Extinction_finalstatus=="no",],
aes(x = Generation_Eggs,
y = Nb_adults,
group = Genetic_Diversity,
colour = Genetic_Diversity),
position = position_dodge2(0.3),
size = 0.6, alpha = 0.9) +
ggplot2::geom_ribbon(data = mod_A_interaction,
aes(ymin = CI_lower,
ymax = CI_upper,
x = Generation_Eggs,
group = Genetic_Diversity,
fill = Genetic_Diversity,
color =Genetic_Diversity),
alpha=0.2, size=0.05) +
geom_line(data = mod_A_interaction,
aes(x = Generation_Eggs, y = Nb_adults, color = Genetic_Diversity),
size = 0.8, linetype = 1) +
ylab("Population size") +
xlab("Generation") +
scale_fill_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FD7C49","#F5C400")) +
scale_color_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FD7C49","#F5C400")) +
xlim(1.8,6.2) +
theme_LO +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
plot_GAM_Ucurve_interaction

#minimum population size
mod_A_interaction[mod_A_interaction$Generation_Eggs==6,]
## .idx Genetic_Diversity Generation_Eggs Population Nb_adults SE
## 73 1 High 6 High1 143.53899 0.1074390
## 74 3 Medium 6 High1 74.57935 0.1284980
## 75 2 Low 6 High1 76.73883 0.1119476
## CI_upper CI_lower
## 73 177.18373 116.28292
## 74 95.93969 57.97475
## 75 95.56677 61.62026
tapply(mod_A_interaction$Nb_adults,mod_A_interaction$Genetic_Diversity,min)
## High Medium Low
## 99.81774 62.02463 58.36272
mod_A_interaction[mod_A_interaction$Genetic_Diversity=="High"&mod_A_interaction$Nb_adults<=99.82,]
## .idx Genetic_Diversity Generation_Eggs Population Nb_adults SE
## 46 1 High 4.5 High1 99.81774 0.08377412
## CI_upper CI_lower
## 46 117.6299 84.7028
mod_A_interaction[mod_A_interaction$Genetic_Diversity=="Medium"&mod_A_interaction$Nb_adults<=62.03,]
## .idx Genetic_Diversity Generation_Eggs Population Nb_adults SE
## 53 3 Medium 4.833333 High1 62.02463 0.1025714
## CI_upper CI_lower
## 53 75.83587 50.72869
mod_A_interaction[mod_A_interaction$Genetic_Diversity=="Low"&mod_A_interaction$Nb_adults<=58.37,]
## .idx Genetic_Diversity Generation_Eggs Population Nb_adults SE
## 51 2 Low 4.666667 High1 58.36272 0.08935786
## CI_upper CI_lower
## 51 69.5342 48.98607
legend_gamm <- lemon::g_legend(plot_GAM_Ucurve_interaction)
PLOT_ALL_GAMM <- cowplot::ggdraw() +
cowplot::draw_plot(plot_GAM_Ucurve + theme(legend.position = "none"),
x = 0.01, y = 0, width = 0.4, height = 0.95) +
cowplot::draw_plot(plot_GAM_Ucurve_interaction + theme(legend.position = "none"),
x = 0.4, y = 0, width = 0.4, height = 0.95) +
cowplot::draw_plot(legend_gamm, x = 0.84, y = 0.5, width = 0.1, height = 0.1) +
cowplot::draw_plot_label(c("A",
"B"),
x = c(0.01,0.4),
y = c(1, 1),
hjust = c(-0.25, -0.25),
vjust = c(1.5, 1.5),
size = 17) +
cowplot::draw_plot_label(c("Best-fit model without interaction ",
"Full model with interaction "),
x = c(0.23, 0.63),
y = c(1, 1),
hjust = c(0.5, 0.5),
vjust = c(1.5, 1.5),
size = 12)
PLOT_ALL_GAMM

# cowplot::save_plot(file = here::here("figures","Fig3_Dynamics_GAMM.pdf"),
# PLOT_ALL_GAMM, base_height = 10/cm(1),
# base_width = 22/cm(1), dpi = 610)
Variance over time:
Coefficient of variation
SUM_CoefVar <- Rmisc::summarySE(data[data$Extinction_finalstatus=="no"&data$Generation_Eggs>=2,],
measurevar = c("Nb_adults"),
groupvars = c("Genetic_Diversity","Generation_Eggs"),
na.rm = TRUE)
SUM_CoefVar
## Genetic_Diversity Generation_Eggs N Nb_adults sd se ci
## 1 High 2 27 344.29630 73.77294 14.197610 29.18360
## 2 High 3 27 151.85185 80.96899 15.582489 32.03027
## 3 High 4 26 114.00000 80.20224 15.728954 32.39439
## 4 High 5 27 103.62963 51.56486 9.923661 20.39838
## 5 High 6 27 147.66667 42.60282 8.198916 16.85311
## 6 Medium 2 18 271.27778 92.04526 21.695276 45.77303
## 7 Medium 3 18 135.94444 88.56867 20.875835 44.04416
## 8 Medium 4 18 78.94444 63.91858 15.065754 31.78596
## 9 Medium 5 18 68.27778 48.73434 11.486794 24.23502
## 10 Medium 6 18 73.61111 44.05496 10.383855 21.90802
## 11 Low 2 26 266.11538 74.47809 14.606356 30.08235
## 12 Low 3 26 112.73077 81.93000 16.067795 33.09224
## 13 Low 4 26 60.07692 35.26463 6.915962 14.24369
## 14 Low 5 25 63.72000 43.11179 8.622359 17.79567
## 15 Low 6 26 82.96154 42.28899 8.293553 17.08089
SUM_CoefVar$CoefVar <- (SUM_CoefVar$sd / SUM_CoefVar$Nb_adults) * 100
SUM_CoefVar
## Genetic_Diversity Generation_Eggs N Nb_adults sd se ci
## 1 High 2 27 344.29630 73.77294 14.197610 29.18360
## 2 High 3 27 151.85185 80.96899 15.582489 32.03027
## 3 High 4 26 114.00000 80.20224 15.728954 32.39439
## 4 High 5 27 103.62963 51.56486 9.923661 20.39838
## 5 High 6 27 147.66667 42.60282 8.198916 16.85311
## 6 Medium 2 18 271.27778 92.04526 21.695276 45.77303
## 7 Medium 3 18 135.94444 88.56867 20.875835 44.04416
## 8 Medium 4 18 78.94444 63.91858 15.065754 31.78596
## 9 Medium 5 18 68.27778 48.73434 11.486794 24.23502
## 10 Medium 6 18 73.61111 44.05496 10.383855 21.90802
## 11 Low 2 26 266.11538 74.47809 14.606356 30.08235
## 12 Low 3 26 112.73077 81.93000 16.067795 33.09224
## 13 Low 4 26 60.07692 35.26463 6.915962 14.24369
## 14 Low 5 25 63.72000 43.11179 8.622359 17.79567
## 15 Low 6 26 82.96154 42.28899 8.293553 17.08089
## CoefVar
## 1 21.42717
## 2 53.32104
## 3 70.35285
## 4 49.75880
## 5 28.85067
## 6 33.93026
## 7 65.15063
## 8 80.96653
## 9 71.37658
## 10 59.84825
## 11 27.98714
## 12 72.67758
## 13 58.69912
## 14 67.65818
## 15 50.97421
# library(DescTools)
# DescTools::CoefVar(data[data$Extinction_finalstatus=="no"&
# data$Generation_Eggs==2&
# data$Genetic_Diversity=="Low",]$Nb_adults, conf.level = 0.95)
data_extant <- data[data$Extinction_finalstatus=="no"&
data$Generation_Eggs>=2,]
#
# tapply(data_extant$Nb_adults,
# list(data_extant$Generation_Eggs,
# data_extant$Genetic_Diversity),
# CoefVar, na.rm=TRUE)
SUM_CoefVar_DescTools <- SUM_CoefVar[,1:2]
SUM_CoefVar_DescTools$CoefVar <- NA
SUM_CoefVar_DescTools$CI_lower <- NA
SUM_CoefVar_DescTools$CI_upper <- NA
SUM_CoefVar_DescTools$Mean <- NA
for (i in levels(SUM_CoefVar_DescTools$Genetic_Diversity)) {
for (j in c(2:6)) {
SUM_CoefVar_DescTools$CoefVar[SUM_CoefVar_DescTools$Genetic_Diversity==i&
SUM_CoefVar_DescTools$Generation_Eggs==j] <- DescTools::CoefVar(data_extant[data_extant$Genetic_Diversity==i&
data_extant$Generation_Eggs==j,]$Nb_adults,
conf.level = 0.95, na.rm=TRUE)[1]
SUM_CoefVar_DescTools$CI_lower[SUM_CoefVar_DescTools$Genetic_Diversity==i&
SUM_CoefVar_DescTools$Generation_Eggs==j] <- DescTools::CoefVar(data_extant[data_extant$Genetic_Diversity==i&
data_extant$Generation_Eggs==j,]$Nb_adults,
conf.level = 0.95, na.rm=TRUE)[2]
SUM_CoefVar_DescTools$CI_upper[SUM_CoefVar_DescTools$Genetic_Diversity==i&
SUM_CoefVar_DescTools$Generation_Eggs==j] <- DescTools::CoefVar(data_extant[data_extant$Genetic_Diversity==i&
data_extant$Generation_Eggs==j,]$Nb_adults,
conf.level = 0.95, na.rm=TRUE)[3]
SUM_CoefVar_DescTools$Mean[SUM_CoefVar_DescTools$Genetic_Diversity==i&
SUM_CoefVar_DescTools$Generation_Eggs==j] <- mean(data_extant[data_extant$Genetic_Diversity==i&
data_extant$Generation_Eggs==j,]$Nb_adults,
na.rm = TRUE)
}
}
### Plot
ggplot2::ggplot(SUM_CoefVar,
aes(x = Genetic_Diversity,
y = CoefVar,
fill = Genetic_Diversity,
color = Genetic_Diversity,
shape = factor(Generation_Eggs))) +
#geom_boxplot(outlier.color = NA) +
geom_point(size = 4, alpha = 1) +
scale_shape_manual(name="Generation", values = c(21,22,23,24,25)) +
scale_fill_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FC4E07", "#E7B800")) +
scale_color_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FC4E07", "#E7B800")) +
ggplot2::scale_x_discrete(labels = c("Diverse","Intermediate bottleneck","Strong bottleneck")) +
guides(color = FALSE, fill = FALSE) +
ylab("Coefficient of variation (%)") +
xlab("Demographic history") +
theme_LO +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())

### Plot
plot_CV <- ggplot2::ggplot(SUM_CoefVar,
aes(x = factor(Generation_Eggs),
y = CoefVar,
color = Genetic_Diversity)) +
geom_point(size = 4, alpha = 1) +
scale_color_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FC4E07", "#E7B800")) +
ylab("Coefficient of variation (%)") +
xlab("Generation") +
theme_LO +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
plot_CV

plot_CV_function <- ggplot2::ggplot(SUM_CoefVar_DescTools,
aes(x = factor(Generation_Eggs),
y = CoefVar,
color = Genetic_Diversity)) +
geom_point(position = position_dodge(0.4),
size = 4, alpha = 1) +
geom_errorbar(aes(x = factor(Generation_Eggs),
ymin = CI_lower,
ymax = CI_upper,
colour = Genetic_Diversity),
position = position_dodge(0.4), size = 0.6, width=0, alpha = 0.8) +
scale_color_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FC4E07", "#E7B800")) +
ylab("Coefficient of variation") +
xlab("Generation") +
theme_LO +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
plot_CV_function

# cowplot::save_plot(here::here("figures","Fig4_CoefficientVariation.pdf"), plot_CV_function,
# base_height = 12/cm(1), base_width = 18/cm(1), dpi = 610)
#
plot_testCV <- ggplot2::ggplot(SUM_CoefVar_DescTools,
aes(x = Mean,
y = CoefVar,
color = Genetic_Diversity)) +
geom_point(size = 4, alpha = 1) +
scale_color_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FC4E07", "#E7B800")) +
theme_LO
plot_testCV

# ## Statistic
# mod0 <- lm(CoefVar ~ Genetic_Diversity, data=SUM_CoefVar)
# mod1 <- lm(CoefVar ~ 1, data=SUM_CoefVar)
# anova(mod0, mod1, test = "Chisq")
#
# mod0 <- lm(CoefVar ~ Genetic_Diversity-1, data=SUM_CoefVar)
# summary(mod0)
# confint(mod0)
Growth rate G6
Distribution
analysis
head(data_phenotyping)
## Population Week Block ID_Rep Divsersity Population.1 Box Initial.N Start
## 1 High1 5-week Block3 52 High 1 1 30 7/8/21
## 2 High1 5-week Block3 53 High 1 2 30 7/8/21
## 3 High13 5-week Block4 129 High 13 1 30 7/15/21
## 4 High13 5-week Block4 130 High 13 2 30 7/15/21
## 5 High13 5-week Block4 131 High 13 3 30 7/15/21
## 6 High14 5-week Block4 132 High 14 1 30 7/15/21
## End Larvae Pupae Adults Lambda Genetic_Diversity Nb_ttx p_adults
## 1 8/12/21 3 9 97 3.2333333 High 109 0.8899083
## 2 8/12/21 1 1 8 0.2666667 High 10 0.8000000
## 3 8/19/21 2 0 84 2.8000000 High 86 0.9767442
## 4 8/19/21 1 2 104 3.4666667 High 107 0.9719626
## 5 8/19/21 2 1 84 2.8000000 High 87 0.9655172
## 6 8/19/21 1 0 83 2.7666667 High 84 0.9880952
## p_pupae p_larvae He_remain He_start He_exp He_end
## 1 0.08256881 0.027522936 0.9986008 0.9986008 0.9679591 0.9666047
## 2 0.10000000 0.100000000 0.9986008 0.9986008 0.9679591 0.9666047
## 3 0.00000000 0.023255814 0.9986008 0.9986008 0.9786327 0.9772634
## 4 0.01869159 0.009345794 0.9986008 0.9986008 0.9786327 0.9772634
## 5 0.01149425 0.022988506 0.9986008 0.9986008 0.9786327 0.9772634
## 6 0.00000000 0.011904762 0.9986008 0.9986008 0.9766045 0.9752381
#Without considering extinct populations
#tapply(data_phenotyping$Lambda,data_phenotyping$Population,length)
hist(data_phenotyping$Lambda, breaks=50)

# #Which distribution
fitdistrplus::descdist(data_phenotyping$Lambda, discrete = FALSE)

## summary statistics
## ------
## min: 0 max: 6.2
## median: 2.1
## mean: 2.213082
## estimated sd: 1.058233
## estimated skewness: 0.5497908
## estimated kurtosis: 3.726017
#############################################################
################## Determine distribution ###################
#############################################################
#Test normal
f1 <- fitdistrplus::fitdist(data_phenotyping$Lambda, "norm")
g1 <- fitdistrplus::gofstat(f1)
g1$kstest
## 1-mle-norm
## "not rejected"
plot(f1)

f1$aic
## [1] 551.898
#Test log-normal
f1 <- fitdistrplus::fitdist(data_phenotyping$Lambda+1, "lnorm")
g1 <- fitdistrplus::gofstat(f1)
g1$kstest
## 1-mle-lnorm
## "not rejected"
plot(f1)

f1$aic
## [1] 552.5385
#Test exp
f1 <- fitdistrplus::fitdist(data_phenotyping$Lambda, "exp")
g1 <- fitdistrplus::gofstat(f1)
g1$kstest
## 1-mle-exp
## "rejected"
plot(f1)

f1$aic
## [1] 669.5117
#Test log-normal
f1 <- fitdistrplus::fitdist(data_phenotyping$Lambda+1, "gamma")
g1 <- fitdistrplus::gofstat(f1)
g1$kstest
## 1-mle-gamma
## "not rejected"
plot(f1)

f1$aic
## [1] 544.6686
## Poisson lognormal model
mlog <- lme4::lmer(log(Lambda+1) ~ Genetic_Diversity + Block + (1|Population),
data=data_phenotyping)
#summary(mlog)
# Square root
msqrt <- lme4::lmer(sqrt(Lambda) ~ Genetic_Diversity + Block + (1|Population),
data=data_phenotyping)
#summary(msqrt)
# Normal
mnorm <- lme4::lmer(Lambda ~ Genetic_Diversity + Block + (1|Population),
data=data_phenotyping)
# # Log Normal
# mlogN <- glm(log(Lambda) ~ Genetic_Diversity + Block,
# family = gaussian, data=data_phenotyping)
## Compare AIC
AIC(mlog, msqrt,mnorm)
## df AIC
## mlog 7 120.8153
## msqrt 7 156.0901
## mnorm 7 528.8983
#Square root a better fits to the data
#But we compare different values: Growth rate and Growth rate+1
# ## Simulate data
x.teo.log <- unlist(simulate(mlog))
x.teo.sqrt <- unlist(simulate(msqrt))
x.teo.norm <- unlist(simulate(mnorm))
# ## QQplot to compared Negative binomial and Poisson log normal distributions
qqplot(data_phenotyping$Lambda, x.teo.log,
main="QQ-plot", xlab="Observed data", ylab="Simulated data",
las=1, xlim = c(0, 10), ylim = c(0, 10), col='blue') ## QQ-plot
points(sort(data_phenotyping$Lambda), sort(x.teo.sqrt), pch=1, col='red')
points(sort(data_phenotyping$Lambda), sort(x.teo.norm), pch=1, col='green')
abline(0,1)
# ## Add legend
legend(0, 7, legend=c("Log", "Sqrt", "Normal"),
col=c("blue", "red", "green"),
pch=1, bty="n")

# ## Normal distribution provides a good fit to the data
# QQ plot
# qqnorm(resid(mlog))
# qqline(resid(mlog))
# qqnorm(resid(msqrt))
# qqline(resid(msqrt))
# qqnorm(resid(mnorm))
# qqline(resid(mnorm)) #best fit
#Distribution Residuals
hist(residuals(mlog),col="yellow",freq=F)

hist(residuals(msqrt),col="yellow",freq=F) #Seems the better fit

hist(residuals(mnorm),col="yellow",freq=F) #Seems the better fit

#Test normality
g1 <- fitdistrplus::gofstat(fitdistrplus::fitdist(residuals(mnorm), "norm"))
g1$kstest
## 1-mle-norm
## "not rejected"
g1 <- fitdistrplus::gofstat(fitdistrplus::fitdist(residuals(msqrt), "norm"))
g1$kstest
## 1-mle-norm
## "not rejected"
g1 <- fitdistrplus::gofstat(fitdistrplus::fitdist(residuals(mnorm), "norm"))
g1$kstest
## 1-mle-norm
## "not rejected"
## Compare AIC
AIC(mlog, msqrt, mnorm)
## df AIC
## mlog 7 120.8153
## msqrt 7 156.0901
## mnorm 7 528.8983
#SLog a better fits to the data
Analysis
#############################################################
################## Analysis ###################
#############################################################
mod0 <- lme4::lmer(Lambda ~ Genetic_Diversity + Block + (1|Population),
data=data_phenotyping)
summary(mod0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Lambda ~ Genetic_Diversity + Block + (1 | Population)
## Data: data_phenotyping
##
## REML criterion at convergence: 514.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3747 -0.6242 -0.0778 0.5409 3.4451
##
## Random effects:
## Groups Name Variance Std.Dev.
## Population (Intercept) 0.2321 0.4817
## Residual 0.7480 0.8649
## Number of obs: 186, groups: Population, 57
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.569709 0.193717 13.265
## Genetic_DiversityMedium -0.873366 0.238587 -3.661
## Genetic_DiversityLow -0.700898 0.222333 -3.152
## BlockBlock4 0.220484 0.214487 1.028
## BlockBlock5 -0.003695 0.249586 -0.015
##
## Correlation of Fixed Effects:
## (Intr) Gnt_DM Gnt_DL BlckB4
## Gntc_DvrstM -0.460
## Gntc_DvrstL -0.587 0.363
## BlockBlock4 -0.636 0.079 0.175
## BlockBlock5 -0.592 0.089 0.232 0.451
mod1 <- lme4::lmer(Lambda ~ Block + (1|Population),
data=data_phenotyping)
anova(mod0,mod1,test="Chisq") # difference
## Data: data_phenotyping
## Models:
## mod1: Lambda ~ Block + (1 | Population)
## mod0: Lambda ~ Genetic_Diversity + Block + (1 | Population)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod1 5 532.53 548.66 -261.26 522.53
## mod0 7 520.90 543.48 -253.45 506.90 15.635 2 0.0004027 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-2*(logLik(mod0)-logLik(mod1))
## 'log Lik.' -12.20285 (df=7)
attr(logLik(mod1), "df")-attr(logLik(mod0), "df")
## [1] -2
lmtest::lrtest(mod1,mod0)
## Likelihood ratio test
##
## Model 1: Lambda ~ Block + (1 | Population)
## Model 2: Lambda ~ Genetic_Diversity + Block + (1 | Population)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 5 -263.55
## 2 7 -257.45 2 12.203 0.00224 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############
###### Posthoc
############
emmeans::emmeans(mod0, list(pairwise ~ Genetic_Diversity), adjust = "tukey")
## $`emmeans of Genetic_Diversity`
## Genetic_Diversity emmean SE df lower.CL upper.CL
## High 2.64 0.135 42.2 2.31 2.98
## Medium 1.77 0.198 51.5 1.28 2.26
## Low 1.94 0.177 58.2 1.51 2.38
##
## Results are averaged over the levels of: Block
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 3 estimates
##
## $`pairwise differences of Genetic_Diversity`
## contrast estimate SE df t.ratio p.value
## High - Medium 0.873 0.239 48.0 3.653 0.0018
## High - Low 0.701 0.223 52.5 3.143 0.0076
## Medium - Low -0.172 0.261 53.0 -0.660 0.7873
##
## Results are averaged over the levels of: Block
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
############
###### CONFIDENCE INTERVAL
############
mod0 <- lme4::lmer(Lambda ~ Genetic_Diversity-1 + Block + (1|Population),
data=data_phenotyping)
summary(mod0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Lambda ~ Genetic_Diversity - 1 + Block + (1 | Population)
## Data: data_phenotyping
##
## REML criterion at convergence: 514.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3747 -0.6242 -0.0778 0.5409 3.4451
##
## Random effects:
## Groups Name Variance Std.Dev.
## Population (Intercept) 0.2321 0.4817
## Residual 0.7480 0.8649
## Number of obs: 186, groups: Population, 57
##
## Fixed effects:
## Estimate Std. Error t value
## Genetic_DiversityHigh 2.569709 0.193717 13.265
## Genetic_DiversityMedium 1.696343 0.227931 7.442
## Genetic_DiversityLow 1.868811 0.190682 9.801
## BlockBlock4 0.220484 0.214487 1.028
## BlockBlock5 -0.003695 0.249586 -0.015
##
## Correlation of Fixed Effects:
## Gnt_DH Gnt_DM Gnt_DL BlckB4
## Gntc_DvrstM 0.369
## Gntc_DvrstL 0.331 0.235
## BlockBlock4 -0.636 -0.457 -0.441
## BlockBlock5 -0.592 -0.410 -0.331 0.451
confint(mod0)
## 2.5 % 97.5 %
## .sig01 0.1878335 0.6406211
## .sigma 0.7698401 0.9855240
## Genetic_DiversityHigh 2.2034040 2.9372360
## Genetic_DiversityMedium 1.2630197 2.1273737
## Genetic_DiversityLow 1.5040561 2.2293763
## BlockBlock4 -0.1881822 0.6259567
## BlockBlock5 -0.4815724 0.4683096
############
###### PREDICT
############
data_predict_extinct <- data.frame(Genetic_Diversity = levels(data_phenotyping$Genetic_Diversity),
Block = "Block4")
data_predict_extinct$predict <- predict(mod0,
type = "response",
re.form = NA,
newdata = data_predict_extinct)
SUM_GrowthRate <- Rmisc::summarySE(data_phenotyping,
measurevar = c("Lambda"),
groupvars = c("Genetic_Diversity","Week"),
na.rm = TRUE)
Remaining He
difference
Rmisc::summarySE(data[data$Generation_Eggs==2,],
measurevar = c("He_remain"),
groupvars = c("Genetic_Diversity"),
na.rm=TRUE)
## Genetic_Diversity N He_remain sd se ci
## 1 High 27 0.9918493 0.001771315 0.0003408897 0.0007007089
## 2 Medium 23 0.6743040 0.006431259 0.0013410101 0.0027810847
## 3 Low 34 0.5404225 0.012690807 0.0021764555 0.0044280320
mfull <- lm(He_remain ~ Genetic_Diversity, data=data[data$Generation_Eggs==2,])
mless <- lm(He_remain ~ 1, data=data[data$Generation_Eggs==2,])
anova(mfull,mless) #Keep effect
## Analysis of Variance Table
##
## Model 1: He_remain ~ Genetic_Diversity
## Model 2: He_remain ~ 1
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 81 0.00631
## 2 83 3.14572 -2 -3.1394 20162 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############
###### Posthoc
############
emmeans::emmeans(mfull, list(pairwise ~ Genetic_Diversity), adjust = "tukey")
## $`emmeans of Genetic_Diversity`
## Genetic_Diversity emmean SE df lower.CL upper.CL
## High 0.9918 0.001698 81 0.9877 0.9960
## Medium 0.6743 0.001840 81 0.6698 0.6788
## Low 0.5404 0.001513 81 0.5367 0.5441
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 3 estimates
##
## $`pairwise differences of Genetic_Diversity`
## contrast estimate SE df t.ratio p.value
## High - Medium 0.318 0.00250 81 126.829 <.0001
## High - Low 0.451 0.00227 81 198.470 <.0001
## Medium - Low 0.134 0.00238 81 56.200 <.0001
##
## P value adjustment: tukey method for comparing a family of 3 estimates
Rmisc::summarySE(data[data$Generation_Eggs==2,],
measurevar = c("He_remain"),
groupvars = c("Genetic_Diversity"),
na.rm=TRUE)
## Genetic_Diversity N He_remain sd se ci
## 1 High 27 0.9918493 0.001771315 0.0003408897 0.0007007089
## 2 Medium 23 0.6743040 0.006431259 0.0013410101 0.0027810847
## 3 Low 34 0.5404225 0.012690807 0.0021764555 0.0044280320
#Calcul for end:
Rmisc::summarySE(data[data$Generation_Eggs==2&data$Extinction_finalstatus!="yes",],
measurevar = c("He_end"),
groupvars = c("Genetic_Diversity"),
na.rm=TRUE)
## Genetic_Diversity N He_end sd se ci
## 1 High 27 0.9663114 0.01931641 0.003717444 0.007641317
## 2 Medium 18 0.6378048 0.03493729 0.008234799 0.017373906
## 3 Low 26 0.5090656 0.03443416 0.006753094 0.013908257
mfull <- lm(He_end ~ Genetic_Diversity, data=data[data$Generation_Eggs==2&data$Extinction_finalstatus!="yes",])
mless <- lm(He_end ~ 1, data=data[data$Generation_Eggs==2&data$Extinction_finalstatus!="yes",])
anova(mfull,mless) #Keep effect
## Analysis of Variance Table
##
## Model 1: He_end ~ Genetic_Diversity
## Model 2: He_end ~ 1
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 68 0.06009
## 2 70 2.97522 -2 -2.9151 1649.3 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############
###### Posthoc
############
emmeans::emmeans(mfull, list(pairwise ~ Genetic_Diversity), adjust = "tukey")
## $`emmeans of Genetic_Diversity`
## Genetic_Diversity emmean SE df lower.CL upper.CL
## High 0.966 0.00572 68 0.952 0.980
## Medium 0.638 0.00701 68 0.621 0.655
## Low 0.509 0.00583 68 0.495 0.523
##
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 3 estimates
##
## $`pairwise differences of Genetic_Diversity`
## contrast estimate SE df t.ratio p.value
## High - Medium 0.329 0.00905 68 36.316 <.0001
## High - Low 0.457 0.00817 68 55.978 <.0001
## Medium - Low 0.129 0.00912 68 14.124 <.0001
##
## P value adjustment: tukey method for comparing a family of 3 estimates
Growth rate and
He
#Best model with genetic diversity
mod0 <- lme4::lmer(Lambda ~ Genetic_Diversity + Block + (1|Population),
data=data_phenotyping)
mod0 <- lme4::lmer(Lambda ~ Genetic_Diversity + Block + (1|Population),
data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod1 <- lme4::lmer(Lambda ~ He_end + Block + (1|Population),
data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod2 <- lme4::lmer(Lambda ~ Block + (1|Population),
data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod3 <- lme4::lmer(Lambda ~ log(He_end) + Block + (1|Population),
data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod4 <- lme4::lmer(Lambda ~ 1 + Block + (1|Population),
data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod5 <- lme4::lmer(Lambda ~ exp(He_end) + Block + (1|Population),
data=data_phenotyping[!is.na(data_phenotyping$He_end),])
MuMIn::model.sel(mod0, mod1,mod2,mod3,mod4,mod5)
## Model selection table
## (Int) Blc Gnt_Dvr He_end log(He_end) exp(He_end) family df
## mod1 0.8154 + 1.766 gaussian(identity) 6
## mod5 0.3445 + 0.8311 gaussian(identity) 6
## mod3 2.5470 + 1.259 gaussian(identity) 6
## mod0 2.5700 + + gaussian(identity) 7
## mod2 2.0770 + gaussian(identity) 5
## mod4 2.0770 + gaussian(identity) 5
## logLik AICc delta weight
## mod1 -257.506 527.5 0.00 0.408
## mod5 -258.097 528.7 1.18 0.226
## mod3 -258.156 528.8 1.30 0.213
## mod0 -257.449 529.5 2.05 0.147
## mod2 -263.551 537.4 9.95 0.003
## mod4 -263.551 537.4 9.95 0.003
## Models ranked by AICc(x)
## Random terms (all models):
## '1 | Population'
#Makes sense: best model log He
x <- seq(1:100)
y <- seq(1001:1100)
plot(log(x),y)

#############################################################
################## Analysis ###################
#############################################################
mod3 <- lme4::lmer(Lambda ~ He_end + Block + (1|Population),
data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod4 <- lme4::lmer(Lambda ~ 1 + Block + (1|Population),
data=data_phenotyping[!is.na(data_phenotyping$He_end),])
anova(mod3,mod4,test="Chisq") # difference
## Data: data_phenotyping[!is.na(data_phenotyping$He_end), ]
## Models:
## mod4: Lambda ~ 1 + Block + (1 | Population)
## mod3: Lambda ~ He_end + Block + (1 | Population)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod4 5 532.53 548.66 -261.26 522.53
## mod3 6 521.90 541.26 -254.95 509.90 12.627 1 0.0003802 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
logLik(mod1)
## 'log Lik.' -257.5057 (df=6)
lmtest::lrtest(mod4,mod3)
## Likelihood ratio test
##
## Model 1: Lambda ~ 1 + Block + (1 | Population)
## Model 2: Lambda ~ He_end + Block + (1 | Population)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 5 -263.55
## 2 6 -257.51 1 12.09 0.000507 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#############################################################
################## Predict ###################
#############################################################
#http://rstudio-pubs-static.s3.amazonaws.com/7024_e3a68a9b35454e74abfe15b621c50502.html
mod3 <- lme4::lmer(Lambda ~ He_end + Block + (1|Population),
data=data_phenotyping[!is.na(data_phenotyping$He_end),])
SUM_MOD <- summary(mod3)
stats::confint(mod3)
## 2.5 % 97.5 %
## .sig01 0.23159556 0.6685194
## .sigma 0.76963964 0.9848593
## (Intercept) 0.09849481 1.5283487
## He_end 0.85203146 2.6836107
## BlockBlock4 -0.19603632 0.6401347
## BlockBlock5 -0.48746506 0.4823031
#confint(mod3, method="Wald")
(estimate_He <- SUM_MOD$coefficients["He_end","Estimate"])
## [1] 1.76556
(IC95_lower <- stats::confint(mod3)["He_end","2.5 %"])
## [1] 0.8520315
(IC95_upper <- stats::confint(mod3)["He_end","97.5 %"])
## [1] 2.683611
#PREDICT
data_predict_lambda <- data.frame(He_end =
seq(min(data_phenotyping[!is.na(data_phenotyping$He_end),]$He_end-0.02),
max(data_phenotyping[!is.na(data_phenotyping$He_end),]$He_end+0.02),
0.01),
Block = "Block4")
data_predict_lambda$lambda_predict <- predict(mod3, type = "response",
re.form = NA,
newdata = data_predict_lambda)
#Confidence interval
bb <- lme4::bootMer(mod3,
FUN=function(x)predict(x,
newdata = data_predict_lambda,
re.form=NA),
nsim=1000)
# Take quantiles of predictions from bootstrap replicates.
# These represent the confidence interval of the mean at any value of Time.
data_predict_lambda$CI_lower <- apply(bb$t, 2, quantile, 0.025)
data_predict_lambda$CI_upper <- apply(bb$t, 2, quantile, 0.975)
#
# cbind(data_predict_lambda,predict(mod3, newdata = data_predict_lambda,
# interval = "confidence", level = 0.95))
# cbind(data_predict_lambda, predict(mod3, newdata = data_predict_lambda, re.form = NA,
# interval = "confidence", level = 0.95))
#
### Plot
# Remove pseudoreplication: average of each population
SUM_POP_He_Mean<- Rmisc::summarySE(data_phenotyping,
measurevar = c("Lambda"),
groupvars = c("Genetic_Diversity",
"He_end",
"Population"),
na.rm=TRUE)
PLOT_He_expect <- ggplot2::ggplot(SUM_POP_He_Mean) +
ggplot2::geom_ribbon(data = data_predict_lambda,
aes(ymin = CI_lower,
ymax = CI_upper,
x = He_end),
fill="grey80", alpha=0.3) +
geom_line(data = data_predict_lambda,
aes(x = He_end, y = lambda_predict),
col = "grey40", size = 1.5, linetype = 1) +
geom_errorbar(data = SUM_POP_He_Mean, aes(x = He_end,
ymin = Lambda-se, ymax = Lambda+se,
colour = Genetic_Diversity),
size = 0.6, width=0, alpha = 0.6) +
geom_point(data = SUM_POP_He_Mean,
aes(x = He_end, y = Lambda,
colour = Genetic_Diversity),
size = 3, alpha = 0.9) +
ylab("Growth rate") +
xlab("Expected heterozygosity") +
scale_color_manual(name="Demographic history",
breaks = c("High", "Medium", "Low"),
labels = c("Diverse","Intermediate bottleneck","Strong bottleneck"),
values = c("#00AFBB","#FC4E07","#E7B800")) +
theme_LO +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank())
PLOT_He_expect

# cowplot::save_plot(here::here("figures","Fig5_GrowthRate_He.pdf"), PLOT_He_expect,
# base_height = 12/cm(1), base_width = 18/cm(1), dpi = 610)
Adults G6
m0 <- lme4::glmer(Adults ~ Genetic_Diversity + (1|Population) + (1|ID_Rep:Population),
family = "poisson", data = data_phenotyping)
summary(m0)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula:
## Adults ~ Genetic_Diversity + (1 | Population) + (1 | ID_Rep:Population)
## Data: data_phenotyping
##
## AIC BIC logLik deviance df.resid
## 1818.1 1834.3 -904.1 1808.1 181
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.23572 -0.14532 0.02672 0.14825 0.41394
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID_Rep:Population (Intercept) 0.1739 0.4170
## Population (Intercept) 0.1085 0.3294
## Number of obs: 186, groups: ID_Rep:Population, 186; Population, 57
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.2943 0.0812 52.884 < 2e-16 ***
## Genetic_DiversityMedium -0.5132 0.1438 -3.569 0.000358 ***
## Genetic_DiversityLow -0.3047 0.1295 -2.352 0.018660 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Gnt_DM
## Gntc_DvrstM -0.563
## Gntc_DvrstL -0.625 0.356
# Equivalent to:
# m0 <- lme4::glmer(Adults ~ Genetic_Diversity + (1|Population/ID_Rep),
# family = "poisson", data = data_5week)
#dispersion_lme4::glmer(m0) #dispersion ok
#Note: (1|School) + (1|Class:School) is equivalent to (1|School/Class)
m0 <- lme4::glmer(Adults ~ Genetic_Diversity + (1|Population) + (1|ID_Rep:Population),
family = "poisson", data = data_phenotyping)
m1 <- lme4::glmer(Adults ~ 1 + (1|Population) + (1|ID_Rep:Population),
family = "poisson", data = data_phenotyping)
anova(m0,m1,test="Chisq") # difference
## Data: data_phenotyping
## Models:
## m1: Adults ~ 1 + (1 | Population) + (1 | ID_Rep:Population)
## m0: Adults ~ Genetic_Diversity + (1 | Population) + (1 | ID_Rep:Population)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m1 3 1826.3 1836.0 -910.15 1820.3
## m0 5 1818.1 1834.3 -904.07 1808.1 12.164 2 0.002284 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m0)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula:
## Adults ~ Genetic_Diversity + (1 | Population) + (1 | ID_Rep:Population)
## Data: data_phenotyping
##
## AIC BIC logLik deviance df.resid
## 1818.1 1834.3 -904.1 1808.1 181
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.23572 -0.14532 0.02672 0.14825 0.41394
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID_Rep:Population (Intercept) 0.1739 0.4170
## Population (Intercept) 0.1085 0.3294
## Number of obs: 186, groups: ID_Rep:Population, 186; Population, 57
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.2943 0.0812 52.884 < 2e-16 ***
## Genetic_DiversityMedium -0.5132 0.1438 -3.569 0.000358 ***
## Genetic_DiversityLow -0.3047 0.1295 -2.352 0.018660 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Gnt_DM
## Gntc_DvrstM -0.563
## Gntc_DvrstL -0.625 0.356
############
###### Posthoc
############
emmeans::emmeans(m0, list(pairwise ~ Genetic_Diversity), adjust = "tukey")
## $`emmeans of Genetic_Diversity`
## Genetic_Diversity emmean SE df asymp.LCL asymp.UCL
## High 4.29 0.0812 Inf 4.10 4.49
## Medium 3.78 0.1188 Inf 3.50 4.06
## Low 3.99 0.1011 Inf 3.75 4.23
##
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 3 estimates
##
## $`pairwise differences of Genetic_Diversity`
## contrast estimate SE df z.ratio p.value
## High - Medium 0.513 0.144 Inf 3.569 0.0010
## High - Low 0.305 0.130 Inf 2.352 0.0489
## Medium - Low -0.208 0.156 Inf -1.340 0.3728
##
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 3 estimates
#test double nested
m0 <- lme4::glmer(Adults ~ Genetic_Diversity + (1|Genetic_Diversity/Population/ID_Rep),
family = "poisson", data = data_phenotyping)
m1 <- lme4::glmer(Adults ~ 1 + (1|Genetic_Diversity/Population/ID_Rep),
family = "poisson", data = data_phenotyping)
anova(m0,m1,test="Chisq")
## Data: data_phenotyping
## Models:
## m1: Adults ~ 1 + (1 | Genetic_Diversity/Population/ID_Rep)
## m0: Adults ~ Genetic_Diversity + (1 | Genetic_Diversity/Population/ID_Rep)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m1 4 1823.6 1836.5 -907.81 1815.6
## m0 6 1820.1 1839.5 -904.07 1808.1 7.4783 2 0.02377 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m0 <- lme4::glmer(Adults ~ Genetic_Diversity + (1|Genetic_Diversity/ID_Rep),
family = "poisson", data = data_phenotyping)
m1 <- lme4::glmer(Adults ~ 1 + (1|Genetic_Diversity/ID_Rep),
family = "poisson", data = data_phenotyping)
anova(m0,m1,test="Chisq")
## Data: data_phenotyping
## Models:
## m1: Adults ~ 1 + (1 | Genetic_Diversity/ID_Rep)
## m0: Adults ~ Genetic_Diversity + (1 | Genetic_Diversity/ID_Rep)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m1 3 1844.6 1854.3 -919.30 1838.6
## m0 5 1838.5 1854.6 -914.26 1828.5 10.086 2 0.006455 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Adults G2
m0 <- glm(Nb_adults ~ Genetic_Diversity + Block, family = "poisson", data = data[data$Generation_Eggs==2,])
summary(m0)
##
## Call:
## glm(formula = Nb_adults ~ Genetic_Diversity + Block, family = "poisson",
## data = data[data$Generation_Eggs == 2, ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -18.7057 -2.7621 0.2126 3.3983 11.4454
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.73890 0.01497 383.312 < 2e-16 ***
## Genetic_DiversityMedium -0.19408 0.01628 -11.920 < 2e-16 ***
## Genetic_DiversityLow -0.19278 0.01460 -13.205 < 2e-16 ***
## BlockBlock4 0.10767 0.01579 6.817 9.3e-12 ***
## BlockBlock5 0.17743 0.01600 11.088 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2379.6 on 83 degrees of freedom
## Residual deviance: 2027.9 on 79 degrees of freedom
## AIC: 2667.2
##
## Number of Fisher Scoring iterations: 4
m1<- glm(Nb_adults ~ Block, family = "poisson", data = data[data$Generation_Eggs==2,])
anova(m0,m1,test="Chisq") # difference
## Analysis of Deviance Table
##
## Model 1: Nb_adults ~ Genetic_Diversity + Block
## Model 2: Nb_adults ~ Block
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 79 2027.9
## 2 81 2241.4 -2 -213.52 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############
###### Posthoc
############
emmeans::emmeans(m0, list(pairwise ~ Genetic_Diversity), adjust = "tukey")
## $`emmeans of Genetic_Diversity`
## Genetic_Diversity emmean SE df asymp.LCL asymp.UCL
## High 5.834 0.01051 Inf 5.809 5.859
## Medium 5.640 0.01243 Inf 5.610 5.670
## Low 5.641 0.01022 Inf 5.617 5.666
##
## Results are averaged over the levels of: Block
## Results are given on the log (not the response) scale.
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 3 estimates
##
## $`pairwise differences of Genetic_Diversity`
## contrast estimate SE df z.ratio p.value
## High - Medium 0.1941 0.0163 Inf 11.920 <.0001
## High - Low 0.1928 0.0146 Inf 13.205 <.0001
## Medium - Low -0.0013 0.0161 Inf -0.081 0.9964
##
## Results are averaged over the levels of: Block
## Results are given on the log (not the response) scale.
## P value adjustment: tukey method for comparing a family of 3 estimates
Proportion life
stage
##### P_adults
mod0 <- lme4::glmer(p_adults ~ Genetic_Diversity + Block + (1|Population),
data = data_phenotyping_4week,family = binomial(link = "logit"),
weights = Nb_ttx)
mod1 <- lme4::glmer(p_adults ~ 1 + Block + (1|Population),
data = data_phenotyping_4week,family = binomial(link = "logit"),
weights = Nb_ttx)
anova(mod0,mod1,test="Chisq") # NO difference
## Data: data_phenotyping_4week
## Models:
## mod1: p_adults ~ 1 + Block + (1 | Population)
## mod0: p_adults ~ Genetic_Diversity + Block + (1 | Population)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod1 4 723.41 733.18 -357.70 715.41
## mod0 6 722.93 737.58 -355.46 710.93 4.4827 2 0.1063
rm(mod0,mod1)
mod0 <- lme4::glmer(p_adults ~ Genetic_Diversity + Block + (1|Population),
data = data_phenotyping,family = binomial(link = "logit"),
weights = Nb_ttx)
mod1 <- lme4::glmer(p_adults ~ 1 + Block + (1|Population),
data = data_phenotyping,family = binomial(link = "logit"),
weights = Nb_ttx)
anova(mod0,mod1,test="Chisq") # NO difference
## Data: data_phenotyping
## Models:
## mod1: p_adults ~ 1 + Block + (1 | Population)
## mod0: p_adults ~ Genetic_Diversity + Block + (1 | Population)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod1 4 893.45 906.31 -442.73 885.45
## mod0 6 896.17 915.46 -442.08 884.17 1.284 2 0.5262
rm(mod0,mod1)
##### P_adults
mod0 <- lme4::glmer(p_pupae ~ Genetic_Diversity + Block + (1|Population),
data = data_phenotyping_4week,family = binomial(link = "logit"),
weights = Nb_ttx)
mod1 <- lme4::glmer(p_pupae ~ 1 + Block + (1|Population),
data = data_phenotyping_4week,family = binomial(link = "logit"),
weights = Nb_ttx)
anova(mod0,mod1,test="Chisq") # NO difference
## Data: data_phenotyping_4week
## Models:
## mod1: p_pupae ~ 1 + Block + (1 | Population)
## mod0: p_pupae ~ Genetic_Diversity + Block + (1 | Population)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod1 4 692.89 702.67 -342.45 684.89
## mod0 6 692.61 707.26 -340.30 680.61 4.2861 2 0.1173
rm(mod0,mod1)
mod0 <- lme4::glmer(p_pupae ~ Genetic_Diversity + Block + (1|Population),
data = data_phenotyping,family = binomial(link = "logit"),
weights = Nb_ttx)
mod1 <- lme4::glmer(p_pupae ~ 1 + Block + (1|Population),
data = data_phenotyping,family = binomial(link = "logit"),
weights = Nb_ttx)
anova(mod0,mod1,test="Chisq") # NO difference
## Data: data_phenotyping
## Models:
## mod1: p_pupae ~ 1 + Block + (1 | Population)
## mod0: p_pupae ~ Genetic_Diversity + Block + (1 | Population)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod1 4 539.89 552.75 -265.95 531.89
## mod0 6 542.26 561.54 -265.13 530.26 1.639 2 0.4407
rm(mod0,mod1)
##### P_larvae
mod0 <- lme4::glmer(p_larvae ~ Genetic_Diversity + Block + (1|Population),
data = data_phenotyping_4week,family = binomial(link = "logit"),
weights = Nb_ttx)
mod1 <- lme4::glmer(p_larvae ~ 1 + Block + (1|Population),
data = data_phenotyping_4week,family = binomial(link = "logit"),
weights = Nb_ttx)
anova(mod0,mod1,test="Chisq") # NO difference and convergence issue
## Data: data_phenotyping_4week
## Models:
## mod1: p_larvae ~ 1 + Block + (1 | Population)
## mod0: p_larvae ~ Genetic_Diversity + Block + (1 | Population)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod1 4 441.93 451.71 -216.97 433.93
## mod0 6 445.08 459.74 -216.54 433.08 0.853 2 0.6528
mod0 <- lme4::glmer(p_larvae ~ Genetic_Diversity + Block + (1|Population),
data = data_phenotyping,family = binomial(link = "logit"),
weights = Nb_ttx)
mod1 <- lme4::glmer(p_larvae ~ 1 + Block + (1|Population),
data = data_phenotyping,family = binomial(link = "logit"),
weights = Nb_ttx)
anova(mod0,mod1,test="Chisq") # NO difference and convergence issue
## Data: data_phenotyping
## Models:
## mod1: p_larvae ~ 1 + Block + (1 | Population)
## mod0: p_larvae ~ Genetic_Diversity + Block + (1 | Population)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod1 4 737.53 750.39 -364.77 729.53
## mod0 6 740.62 759.91 -364.31 728.62 0.9082 2 0.635
Multinomial life
stage
# data(alligator)
# head(alligator)
# fit <- multinom(food ~ lake+size+sex, data=alligator, weights=count)
# summary(fit$fitted.values)
data_TEMP <- data_phenotyping_all[,c(1:5,11:13,23)]
data_multinom <- tidyr::gather(data_TEMP,"stage","count",6:8)
data_multinom$Divsersity <- as.factor(data_multinom$Divsersity )
head(data_multinom)
## Population Week Block ID_Rep Divsersity He_end stage count
## 1 High1 5-week Block3 52 High 0.9666047 Larvae 3
## 2 High1 5-week Block3 53 High 0.9666047 Larvae 1
## 3 High13 5-week Block4 129 High 0.9772634 Larvae 2
## 4 High13 5-week Block4 130 High 0.9772634 Larvae 1
## 5 High13 5-week Block4 131 High 0.9772634 Larvae 2
## 6 High14 5-week Block4 132 High 0.9752381 Larvae 1
#
# fit <- nnet::multinom(stage ~ Block + Divsersity + Week +
# ID_Rep , data=data_multinom, weights=count)
# summary(fit$fitted.values)
# fit
#
# fit0 <- nnet::multinom(stage ~ Block + Divsersity +
# ID_Rep , data=data_multinom, weights=count)
# anova(fit,fit0)
#
# fit1 <- nnet::multinom(stage ~ Block + Week + ID_Rep, data=data_multinom, weights=count)
# anova(fit1,fit)
#
#
# ### New function to test random effect MCLOGIT
# # data_TEMP <- data_phenotyping_all[,c(1:5,17:19,23)]
# # data_multinom <- tidyr::gather(data_TEMP,"stage","prop",6:8)
# # data_multinom$Divsersity <- as.factor(data_multinom$Divsersity )
# # head(data_multinom)
#
#
# #Other example
# library("AER")
# library("mlogit")
#### 1- Dataset 1: Train transport
# data("TravelMode", package="AER")
# head(TravelMode)
# #Here: Each individual can choose across 4 modes of transports: air train bus or car
# #We can determine if this choice is random
# #or determined by different variables as: vcost, travel, etc.
#
# TM <- mlogit::mlogit.data(data = TravelMode, #dataset
# choice = "choice", #Variable containing the choice: yes or no
# shape = "long", #type of dataset
# alt.levels = "mode") #Variable contianing the list of choice
#
# fit0 <- mlogit::mlogit.data(data=data_multinom,
# choice = "count",
# alt.levels = "stage",
# shape = "long")
#
#
#### 2- Dataset 2: Fishing price
#https://rdrr.io/cran/mlogit/man/mlogit.html
# data("Fishing", package = "mlogit")
# head(Fishing)
# #There are:
#two alternative specific variables : price and catch one individual
#specific variable (income)
#four fishing mode : beach, pier, boat, charter
#A dataframe containing :
#mode: recreation mode choice, one of : beach, pier, boat and charter,
#price.beach: price for beach mode
#price.pier: price for pier mode,
#price.boat: price for private boat mode,
#price.charter: price for charter boat mode,
#catch.beach: catch rate for beach mode,
#catch.pier: catch rate for pier mode,
#catch.boat: catch rate for private boat mode,
#catch.charter: catch rate for charter boat mode,
#income: monthly income,
#### Discussion with Ann Hess
## Response is stage
## Weight or group with number
## As Random effect: ID_Box (6 values)
## As Random effect: Population (one to 6 replicates per population)
## As fixed effect: diversity x Time
data_multinom$stage <- as.factor(data_multinom$stage)
data_multinom$stage <- factor(data_multinom$stage, levels = c("Larvae", "Pupae", "Adults"))
# fit <- nnet::multinom(stage ~ Block + Divsersity + Week +
# ID_Rep , data=data_multinom, weights=count)
# mod0 <- ordinal::clmm(stage ~ Block + Divsersity * Week +
# (1|ID_Rep) + (1|Population),
# data=data_multinom, weights=count)
mod1 <- ordinal::clmm(stage ~ Block + Divsersity + Week +
(1|ID_Rep) + (1|Population),
data=data_multinom, weights=count)
mod2 <- ordinal::clmm(stage ~ Block + Divsersity +
(1|ID_Rep) + (1|Population),
data=data_multinom, weights=count)
mod3 <- ordinal::clmm(stage ~ Block + Week +
(1|ID_Rep) + (1|Population),
data=data_multinom, weights=count)
#anova(mod1,mod0) #sign
anova(mod1,mod2) #sign
## Likelihood ratio tests of cumulative link models:
##
## formula: link:
## mod2 stage ~ Block + Divsersity + (1 | ID_Rep) + (1 | Population) logit
## mod1 stage ~ Block + Divsersity + Week + (1 | ID_Rep) + (1 | Population) logit
## threshold:
## mod2 flexible
## mod1 flexible
##
## no.par AIC logLik LR.stat df Pr(>Chisq)
## mod2 8 16875 -8429.7
## mod1 9 14842 -7412.2 2035.1 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod1,mod3) # non sign
## Likelihood ratio tests of cumulative link models:
##
## formula: link:
## mod3 stage ~ Block + Week + (1 | ID_Rep) + (1 | Population) logit
## mod1 stage ~ Block + Divsersity + Week + (1 | ID_Rep) + (1 | Population) logit
## threshold:
## mod3 flexible
## mod1 flexible
##
## no.par AIC logLik LR.stat df Pr(>Chisq)
## mod3 7 14838 -7412.2
## mod1 9 14842 -7412.2 0.127 2 0.9385
-2*(logLik(mod2)-logLik(mod1))
## 'log Lik.' 2035.075 (df=8)
lmtest::lrtest(mod2,mod1)
## Likelihood ratio test
##
## Model 1: stage ~ Block + Divsersity + (1 | ID_Rep) + (1 | Population)
## Model 2: stage ~ Block + Divsersity + Week + (1 | ID_Rep) + (1 | Population)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 8 -8429.7
## 2 9 -7412.2 1 2035.1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lmtest::lrtest(mod3,mod1)
## Likelihood ratio test
##
## Model 1: stage ~ Block + Week + (1 | ID_Rep) + (1 | Population)
## Model 2: stage ~ Block + Divsersity + Week + (1 | ID_Rep) + (1 | Population)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 7 -7412.2
## 2 9 -7412.2 2 0.127 0.9385
mod3 <- ordinal::clmm(stage ~ Week + Block +
(1|ID_Rep) + (1|Population),
data=data_multinom, weights=count)
mod3$coefficients
## Larvae|Pupae Pupae|Adults Week5-week BlockBlock4 BlockBlock5
## -2.6968695 -0.8006353 2.6176503 0.1047649 0.1707286
confint(mod3)
## 2.5 % 97.5 %
## Larvae|Pupae -3.0357006 -2.3580384
## Pupae|Adults -1.1328356 -0.4684350
## Week5-week 2.4833263 2.7519744
## BlockBlock4 -0.3443890 0.5539188
## BlockBlock5 -0.3816695 0.7231267
sigma(mod3)
## numeric(0)
summary(mod3)
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula: stage ~ Week + Block + (1 | ID_Rep) + (1 | Population)
## data: data_multinom
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 19277 -7412.25 14838.49 423(2995) 3.24e-03 1.8e+02
##
## Random effects:
## Groups Name Variance Std.Dev.
## ID_Rep (Intercept) 0.5185 0.7200
## Population (Intercept) 0.3401 0.5832
## Number of groups: ID_Rep 189, Population 62
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## Week5-week 2.61765 0.06853 38.195 <2e-16 ***
## BlockBlock4 0.10476 0.22916 0.457 0.648
## BlockBlock5 0.17073 0.28184 0.606 0.545
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## Larvae|Pupae -2.6969 0.1729 -15.600
## Pupae|Adults -0.8006 0.1695 -4.724
## (261 observations deleted due to missingness)
SUMMARY ALL
ANALYSIS
######### Proba of extinction
mod0 <- glm(y ~ Genetic_Diversity + Block,
data = data_proba_extinction, family = binomial)
mod1 <- glm(y ~ Block ,
data = data_proba_extinction, family = binomial)
anova(mod1, mod0, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: y ~ Block
## Model 2: y ~ Genetic_Diversity + Block
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 81 58.903
## 2 79 46.833 2 12.069 0.002394 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans::emmeans(mod0, list(pairwise ~ Genetic_Diversity), adjust = "tukey")
## $`emmeans of Genetic_Diversity`
## Genetic_Diversity emmean SE df asymp.LCL asymp.UCL
## High -20.07 1855.743 Inf -4451.10 4410.961
## Medium -1.77 0.650 Inf -3.32 -0.216
## Low -1.56 0.532 Inf -2.83 -0.290
##
## Results are averaged over the levels of: Block
## Results are given on the logit (not the response) scale.
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 3 estimates
##
## $`pairwise differences of Genetic_Diversity`
## contrast estimate SE df z.ratio p.value
## High - Medium -18.300 1855.743 Inf -0.010 0.9999
## High - Low -18.506 1855.743 Inf -0.010 0.9999
## Medium - Low -0.206 0.751 Inf -0.274 0.9594
##
## Results are averaged over the levels of: Block
## Results are given on the log odds ratio (not the response) scale.
## P value adjustment: tukey method for comparing a family of 3 estimates
#########Time of extinction
mod1 <- glm(Generation_Eggs ~ Genetic_Diversity + Block,
data = data_timeextinction, family = "poisson")
mod0 <- glm(Generation_Eggs ~ Block,
data = data_timeextinction, family = "poisson")
anova(mod0, mod1, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: Generation_Eggs ~ Block
## Model 2: Generation_Eggs ~ Genetic_Diversity + Block
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 10 0.95459
## 2 9 0.74888 1 0.20571 0.6502
#########Dynamic over time
# Test effect
mod_1 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=5) +
s(Generation_Eggs, by = Genetic_Diversity, k=5) +
s(Population, bs="re"), method="ML", family = mgcv::nb(), data=data_dyn)
mod_2 <- mgcv::gam(Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k=5) +
s(Population, bs="re"), data=data_dyn, family = mgcv::nb(), method='ML')
itsadug::compareML(mod_1, mod_2)
## mod_1: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) +
## s(Generation_Eggs, by = Genetic_Diversity, k = 5) + s(Population,
## bs = "re")
##
## mod_2: Nb_adults ~ Genetic_Diversity + Block + s(Generation_Eggs, k = 5) +
## s(Population, bs = "re")
##
## Chi-square test of ML scores
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 mod_2 1977.920 8
## 2 mod_1 1974.918 14 3.003 6.000 0.423
##
## AIC difference: -4.21, model mod_1 has lower AIC.
#########Growth rate
mod0 <- lme4::lmer(Lambda ~ Genetic_Diversity + Block + (1|Population),
data=data_phenotyping)
mod1 <- lme4::lmer(Lambda ~ Block + (1|Population),
data=data_phenotyping)
anova(mod0,mod1,test="Chisq") # difference
## Data: data_phenotyping
## Models:
## mod1: Lambda ~ Block + (1 | Population)
## mod0: Lambda ~ Genetic_Diversity + Block + (1 | Population)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod1 5 532.53 548.66 -261.26 522.53
## mod0 7 520.90 543.48 -253.45 506.90 15.635 2 0.0004027 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans::emmeans(mod0, list(pairwise ~ Genetic_Diversity), adjust = "tukey")
## $`emmeans of Genetic_Diversity`
## Genetic_Diversity emmean SE df lower.CL upper.CL
## High 2.64 0.135 42.2 2.31 2.98
## Medium 1.77 0.198 51.5 1.28 2.26
## Low 1.94 0.177 58.2 1.51 2.38
##
## Results are averaged over the levels of: Block
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
## Conf-level adjustment: sidak method for 3 estimates
##
## $`pairwise differences of Genetic_Diversity`
## contrast estimate SE df t.ratio p.value
## High - Medium 0.873 0.239 48.0 3.653 0.0018
## High - Low 0.701 0.223 52.5 3.143 0.0076
## Medium - Low -0.172 0.261 53.0 -0.660 0.7873
##
## Results are averaged over the levels of: Block
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
#########Growth rate vs. He
mod3 <- lme4::lmer(Lambda ~ He_end + Block + (1|Population),
data=data_phenotyping[!is.na(data_phenotyping$He_end),])
mod4 <- lme4::lmer(Lambda ~ 1 + Block + (1|Population),
data=data_phenotyping[!is.na(data_phenotyping$He_end),])
anova(mod3,mod4,test="Chisq") # difference
## Data: data_phenotyping[!is.na(data_phenotyping$He_end), ]
## Models:
## mod4: Lambda ~ 1 + Block + (1 | Population)
## mod3: Lambda ~ He_end + Block + (1 | Population)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## mod4 5 532.53 548.66 -261.26 522.53
## mod3 6 521.90 541.26 -254.95 509.90 12.627 1 0.0003802 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
########Life stage
mod1 <- ordinal::clmm(stage ~ Block + Divsersity + Week +
(1|ID_Rep) + (1|Population),
data=data_multinom, weights=count)
mod2 <- ordinal::clmm(stage ~ Block + Divsersity +
(1|ID_Rep) + (1|Population),
data=data_multinom, weights=count)
mod3 <- ordinal::clmm(stage ~ Block + Week +
(1|ID_Rep) + (1|Population),
data=data_multinom, weights=count)
anova(mod1,mod2) #sign
## Likelihood ratio tests of cumulative link models:
##
## formula: link:
## mod2 stage ~ Block + Divsersity + (1 | ID_Rep) + (1 | Population) logit
## mod1 stage ~ Block + Divsersity + Week + (1 | ID_Rep) + (1 | Population) logit
## threshold:
## mod2 flexible
## mod1 flexible
##
## no.par AIC logLik LR.stat df Pr(>Chisq)
## mod2 8 16875 -8429.7
## mod1 9 14842 -7412.2 2035.1 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod1,mod3) # non sign
## Likelihood ratio tests of cumulative link models:
##
## formula: link:
## mod3 stage ~ Block + Week + (1 | ID_Rep) + (1 | Population) logit
## mod1 stage ~ Block + Divsersity + Week + (1 | ID_Rep) + (1 | Population) logit
## threshold:
## mod3 flexible
## mod1 flexible
##
## no.par AIC logLik LR.stat df Pr(>Chisq)
## mod3 7 14838 -7412.2
## mod1 9 14842 -7412.2 0.127 2 0.9385